PNG 2024-09-12 (a7e0c0c8)

Run the UpdateFromUpstream.sh script to extract upstream PNG using the following shell commands. $ git archive --prefix=upstream-png/ a7e0c0c8 -- png*.c png*.h arm/* LICENSE scripts/pnglibconf.h.prebuilt | tar x $ git shortlog --perl-regexp --author='^((?!Kitware Robot).*)$' --no-merges --abbrev=8 --format='%h %s' 7a614829..a7e0c0c8 Ben Wagner (1): 40878fd6 fix: Restore STDERR in pngtest.c Benjamin Buch (2): 7b888092 build: Mark the installed libpng headers as system headers in CMake 0e204b73 build: Add a CMake config file compatible with the FindPNG module Cosmin Truta (35): 8120345c ci: Update (again) the ci_verify_*.sh scripts; update .shellcheckrc 1cdde118 build: Update the makefiles for the benefit of cross-compilation 63c715b7 ci: Remove the workaround for CI_AR from ci_verify_makefiles.sh fb65436e ci: Fix the check of `PNG_LIBPNG_VER_BUILD` in ci_verify_version.sh f74d5ecc ci: Pacify shellcheck version 0.8 and apply other linting improvements 42c8fcbf Add a GitHub Action for linting 4191872d chore: Update the .editorconfig files and pacify editorconfig-checker 0fa3c0f6 chore: Clean up the spurious uses of `sizeof(png_byte)`; fix the manual 72c4520d ci: Allow the user to force an in-tree cleanup before verification dddaf0c6 ci: Fix the reporting in ci_lint.sh 6b5a2da0 Fix "ci: Fix the reporting in ci_lint.sh" aa95dee6 build: Update the CMake build options PNG_TOOLS and PNG_FRAMEWORK d165a20a build: Improve the search for an AWK processor in the CMake build 29e31f62 build: Add an explicit declaration of the AWK variable to configure.ac 14a348dd build: Checking for compiler support of LoongArch LSX should be guarded ec2e58c1 pngexif: Import pngexifinfo as an externally-contributed project e05ebfba doc: Update the README file 3b9a73ed doc: Review the libpng history and update scripts/cmake/AUTHORS.md 89023102 chore: Delete comments and config settings and stuff from here and there 80691b9d test: Fix a compiler warning in pngtest.c e1fa61da ci: Add the libpng release tags to the list of exclusions ed217e3e Release libpng version 1.6.43 f1848a3b Bump version to 1.6.44.git e4a31f02 arm: Add a placeholder file in lieu of the former `filter_neon.S` 532fec02 ci: Fix the verification of the msys2 toolchain on AppVeyor CI 33ef48b6 cmake: Fix the handling of PNG_HARDWARE_OPTIMIZATIONS on FreeBSD/amd64 7e18d142 ci: Add the targets/ subdirectory to facilitate cross-platform testing 22208658 chore: Pacify editorconfig-checker version 3.0 fcdec9c6 chore: Delete contrib/tools/chkfmt.sh 53a7f4e3 ci: Simplify the Travis CI configuration matrix 1964d560 Deprecate PNGARG and remove all of its remaining uses 843dbb75 Revert "cmake: Fix an error in the declaration of target include directories" 3117b5f9 oss-fuzz: Update the README file, the Docker file and the build script 88ab4f59 chore: Rerun `./autogen.sh --maintainer` f5e92d76 Release libpng version 1.6.44 Dan Rosser (1): e7ba9c0d build: Fix a CMake build regression introduced in version 1.6.41 Eric Riff (1): 43d6ad3e cmake: Honor CMAKE_SYSROOT if set Green Sky (2): 1d1cc9ae cmake: Fix an error in the declaration of target include directories 8cc22a8c cmake: Fix an error in the declaration of target include directories John Bowler (3): ceed2a3c SECURITY: disable build of filter_neon.S on arm 20f819c2 fix: Remove cHRM check to accomodate ACES AP1 5a7e87fc fix: Prevent overflow in chromaticity calculations Mikhail Khachayants (1): 68ba4f1f oss-fuzz: Add fuzzing targets for simplified READ API Ross Burton (1): 9e538750 arm: Remove obsolete assembler implementation filter_neon.S Change-Id: I5a75ec7e571b5b316397bd60c5d4e1038945b447
bradking · Nov 18, 2024 · 2b57d16 · 2b57d16
1 parent 55292c9
commit 2b57d16
Show file tree

Hide file tree

Showing 9 changed files with 144 additions and 369 deletions.
diff --git a/arm/filter_neon.S b/arm/filter_neon.S
@@ -1,253 +1,61 @@
 
-/* filter_neon.S - NEON optimised filter functions
+/* filter_neon.S - placeholder file
  *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 2014,2017 Glenn Randers-Pehrson
- * Written by Mans Rullgard, 2011.
+ * Copyright (c) 2024 Cosmin Truta
  *
  * This code is released under the libpng license.
  * For conditions of distribution and use, see the disclaimer
  * and license in png.h
  */
 
+/* IMPORTANT NOTE:
+ *
+ * Historically, the hand-coded assembler implementation of Neon optimizations
+ * in this module had not been in sync with the intrinsics-based implementation
+ * in filter_neon_intrinsics.c and palette_neon_intrinsics.c, at least since
+ * the introduction of riffled palette optimizations. Moreover, the assembler
+ * code used to work on 32-bit ARM only, and it caused problems, even if empty,
+ * on 64-bit ARM.
+ *
+ * All references to this module from our internal build scripts and projects
+ * have been removed.
+ *
+ * For the external projects that might still expect this module to be present,
+ * we leave this stub in place, for the remaining lifetime of libpng-1.6.x.
+ * Everything should continue to function normally, as long as there are no
+ * deliberate attempts to use the old hand-made assembler code. A build error
+ * will be raised otherwise.
+ */
+
 /* This is required to get the symbol renames, which are #defines, and the
  * definitions (or not) of PNG_ARM_NEON_OPT and PNG_ARM_NEON_IMPLEMENTATION.
  */
 #define PNG_VERSION_INFO_ONLY
 #include "../pngpriv.h"
 
-#if (defined(__linux__) || defined(__FreeBSD__)) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
-#endif
-
 #ifdef PNG_READ_SUPPORTED
-
-/* Assembler NEON support - only works for 32-bit ARM (i.e. it does not work for
- * ARM64).  The code in arm/filter_neon_intrinsics.c supports ARM64, however it
- * only works if -mfpu=neon is specified on the GCC command line.  See pngpriv.h
- * for the logic which sets PNG_USE_ARM_NEON_ASM:
- */
 #if PNG_ARM_NEON_IMPLEMENTATION == 2 /* hand-coded assembler */
-
 #if PNG_ARM_NEON_OPT > 0
 
-#ifdef __ELF__
-#   define ELF
+#if defined(__clang__)
+#define GNUC_VERSION 0 /* not gcc, although it might pretend to be */
+#elif defined(__GNUC__)
+#define GNUC_MAJOR (__GNUC__ + 0)
+#define GNUC_MINOR (__GNUC_MINOR__ + 0)
+#define GNUC_PATCHLEVEL (__GNUC_PATCHLEVEL__ + 0)
+#define GNUC_VERSION (GNUC_MAJOR * 10000 + GNUC_MINOR * 100 + GNUC_PATCHLEVEL)
 #else
-#   define ELF @
+#define GNUC_VERSION 0 /* not gcc */
 #endif
 
-        .arch armv7-a
-        .fpu  neon
-
-.macro  func    name, export=0
-    .macro endfunc
-ELF     .size   \name, . - \name
-        .endfunc
-        .purgem endfunc
-    .endm
-        .text
-
-        /* Explicitly specifying alignment here because some versions of
-         * GAS don't align code correctly.  This is harmless in correctly
-         * written versions of GAS.
-         */
-        .align 2
-
-    .if \export
-        .global \name
-    .endif
-ELF     .type   \name, STT_FUNC
-        .func   \name
-\name:
-.endm
-
-func    png_read_filter_row_sub4_neon, export=1
-        ldr             r3,  [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-1:
-        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
-        vadd.u8         d0,  d3,  d4
-        vadd.u8         d1,  d0,  d5
-        vadd.u8         d2,  d1,  d6
-        vadd.u8         d3,  d2,  d7
-        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
-        subs            r3,  r3,  #16
-        bgt             1b
-
-        bx              lr
-endfunc
-
-func    png_read_filter_row_sub3_neon, export=1
-        ldr             r3,  [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-        mov             r0,  r1
-        mov             r2,  #3
-        mov             r12, #12
-        vld1.8          {q11},    [r0], r12
-1:
-        vext.8          d5,  d22, d23, #3
-        vadd.u8         d0,  d3,  d22
-        vext.8          d6,  d22, d23, #6
-        vadd.u8         d1,  d0,  d5
-        vext.8          d7,  d23, d23, #1
-        vld1.8          {q11},    [r0], r12
-        vst1.32         {d0[0]},  [r1,:32], r2
-        vadd.u8         d2,  d1,  d6
-        vst1.32         {d1[0]},  [r1], r2
-        vadd.u8         d3,  d2,  d7
-        vst1.32         {d2[0]},  [r1], r2
-        vst1.32         {d3[0]},  [r1], r2
-        subs            r3,  r3,  #12
-        bgt             1b
-
-        bx              lr
-endfunc
-
-func    png_read_filter_row_up_neon, export=1
-        ldr             r3,  [r0, #4]           @ rowbytes
-1:
-        vld1.8          {q0}, [r1,:128]
-        vld1.8          {q1}, [r2,:128]!
-        vadd.u8         q0,  q0,  q1
-        vst1.8          {q0}, [r1,:128]!
-        subs            r3,  r3,  #16
-        bgt             1b
-
-        bx              lr
-endfunc
-
-func    png_read_filter_row_avg4_neon, export=1
-        ldr             r12, [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-1:
-        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
-        vld4.32         {d16[],d17[],d18[],d19[]},[r2,:128]!
-        vhadd.u8        d0,  d3,  d16
-        vadd.u8         d0,  d0,  d4
-        vhadd.u8        d1,  d0,  d17
-        vadd.u8         d1,  d1,  d5
-        vhadd.u8        d2,  d1,  d18
-        vadd.u8         d2,  d2,  d6
-        vhadd.u8        d3,  d2,  d19
-        vadd.u8         d3,  d3,  d7
-        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
-        subs            r12, r12, #16
-        bgt             1b
-
-        bx              lr
-endfunc
-
-func    png_read_filter_row_avg3_neon, export=1
-        push            {r4,lr}
-        ldr             r12, [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-        mov             r0,  r1
-        mov             r4,  #3
-        mov             lr,  #12
-        vld1.8          {q11},    [r0], lr
-1:
-        vld1.8          {q10},    [r2], lr
-        vext.8          d5,  d22, d23, #3
-        vhadd.u8        d0,  d3,  d20
-        vext.8          d17, d20, d21, #3
-        vadd.u8         d0,  d0,  d22
-        vext.8          d6,  d22, d23, #6
-        vhadd.u8        d1,  d0,  d17
-        vext.8          d18, d20, d21, #6
-        vadd.u8         d1,  d1,  d5
-        vext.8          d7,  d23, d23, #1
-        vld1.8          {q11},    [r0], lr
-        vst1.32         {d0[0]},  [r1,:32], r4
-        vhadd.u8        d2,  d1,  d18
-        vst1.32         {d1[0]},  [r1], r4
-        vext.8          d19, d21, d21, #1
-        vadd.u8         d2,  d2,  d6
-        vhadd.u8        d3,  d2,  d19
-        vst1.32         {d2[0]},  [r1], r4
-        vadd.u8         d3,  d3,  d7
-        vst1.32         {d3[0]},  [r1], r4
-        subs            r12, r12, #12
-        bgt             1b
-
-        pop             {r4,pc}
-endfunc
-
-.macro  paeth           rx,  ra,  rb,  rc
-        vaddl.u8        q12, \ra, \rb           @ a + b
-        vaddl.u8        q15, \rc, \rc           @ 2*c
-        vabdl.u8        q13, \rb, \rc           @ pa
-        vabdl.u8        q14, \ra, \rc           @ pb
-        vabd.u16        q15, q12, q15           @ pc
-        vcle.u16        q12, q13, q14           @ pa <= pb
-        vcle.u16        q13, q13, q15           @ pa <= pc
-        vcle.u16        q14, q14, q15           @ pb <= pc
-        vand            q12, q12, q13           @ pa <= pb && pa <= pc
-        vmovn.u16       d28, q14
-        vmovn.u16       \rx, q12
-        vbsl            d28, \rb, \rc
-        vbsl            \rx, \ra, d28
-.endm
-
-func    png_read_filter_row_paeth4_neon, export=1
-        ldr             r12, [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-        vmov.i8         d20, #0
-1:
-        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
-        vld4.32         {d16[],d17[],d18[],d19[]},[r2,:128]!
-        paeth           d0,  d3,  d16, d20
-        vadd.u8         d0,  d0,  d4
-        paeth           d1,  d0,  d17, d16
-        vadd.u8         d1,  d1,  d5
-        paeth           d2,  d1,  d18, d17
-        vadd.u8         d2,  d2,  d6
-        paeth           d3,  d2,  d19, d18
-        vmov            d20, d19
-        vadd.u8         d3,  d3,  d7
-        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
-        subs            r12, r12, #16
-        bgt             1b
-
-        bx              lr
-endfunc
-
-func    png_read_filter_row_paeth3_neon, export=1
-        push            {r4,lr}
-        ldr             r12, [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-        vmov.i8         d4,  #0
-        mov             r0,  r1
-        mov             r4,  #3
-        mov             lr,  #12
-        vld1.8          {q11},    [r0], lr
-1:
-        vld1.8          {q10},    [r2], lr
-        paeth           d0,  d3,  d20, d4
-        vext.8          d5,  d22, d23, #3
-        vadd.u8         d0,  d0,  d22
-        vext.8          d17, d20, d21, #3
-        paeth           d1,  d0,  d17, d20
-        vst1.32         {d0[0]},  [r1,:32], r4
-        vext.8          d6,  d22, d23, #6
-        vadd.u8         d1,  d1,  d5
-        vext.8          d18, d20, d21, #6
-        paeth           d2,  d1,  d18, d17
-        vext.8          d7,  d23, d23, #1
-        vld1.8          {q11},    [r0], lr
-        vst1.32         {d1[0]},  [r1], r4
-        vadd.u8         d2,  d2,  d6
-        vext.8          d19, d21, d21, #1
-        paeth           d3,  d2,  d19, d18
-        vst1.32         {d2[0]},  [r1], r4
-        vmov            d4,  d19
-        vadd.u8         d3,  d3,  d7
-        vst1.32         {d3[0]},  [r1], r4
-        subs            r12, r12, #12
-        bgt             1b
+#if (GNUC_VERSION > 0) && (GNUC_VERSION < 40300)
+#error "PNG_ARM_NEON is not supported with gcc versions earlier than 4.3.0"
+#elif GNUC_VERSION == 40504
+#error "PNG_ARM_NEON is not supported with gcc version 4.5.4"
+#else
+#error "Please use 'arm/*_neon_intrinsics.c' for PNG_ARM_NEON support"
+#endif
 
-        pop             {r4,pc}
-endfunc
 #endif /* PNG_ARM_NEON_OPT > 0 */
-#endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 (assembler) */
+#endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 */
 #endif /* READ */