diff --git a/CMakeLists.txt b/CMakeLists.txt index e0d0369501..109ce203eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,188 +134,6 @@ else() # libm is not available or not needed. endif() -if(PNG_HARDWARE_OPTIMIZATIONS) - -# Set definitions and sources for ARM. -if(PNG_TARGET_ARCHITECTURE MATCHES "^(arm|aarch)") - if(PNG_TARGET_ARCHITECTURE MATCHES "^(arm64|aarch64)") - set(PNG_ARM_NEON_POSSIBLE_VALUES on off) - set(PNG_ARM_NEON "on" - CACHE STRING "Enable ARM NEON optimizations: on|off; on is default") - else() - set(PNG_ARM_NEON_POSSIBLE_VALUES check on off) - set(PNG_ARM_NEON "off" - CACHE STRING "Enable ARM NEON optimizations: check|on|off; off is default") - endif() - set_property(CACHE PNG_ARM_NEON - PROPERTY STRINGS ${PNG_ARM_NEON_POSSIBLE_VALUES}) - list(FIND PNG_ARM_NEON_POSSIBLE_VALUES ${PNG_ARM_NEON} index) - if(index EQUAL -1) - message(FATAL_ERROR "PNG_ARM_NEON must be one of [${PNG_ARM_NEON_POSSIBLE_VALUES}]") - elseif(NOT PNG_ARM_NEON STREQUAL "off") - set(libpng_arm_sources - arm/arm_init.c - arm/filter_neon_intrinsics.c - arm/palette_neon_intrinsics.c) - if(PNG_ARM_NEON STREQUAL "on") - add_definitions(-DPNG_ARM_NEON_OPT=2) - elseif(PNG_ARM_NEON STREQUAL "check") - add_definitions(-DPNG_ARM_NEON_CHECK_SUPPORTED) - endif() - else() - add_definitions(-DPNG_ARM_NEON_OPT=0) - endif() -endif() - -# Set definitions and sources for PowerPC. -if(PNG_TARGET_ARCHITECTURE MATCHES "^(powerpc|ppc64)") - set(PNG_POWERPC_VSX_POSSIBLE_VALUES on off) - set(PNG_POWERPC_VSX "on" - CACHE STRING "Enable POWERPC VSX optimizations: on|off; on is default") - set_property(CACHE PNG_POWERPC_VSX - PROPERTY STRINGS ${PNG_POWERPC_VSX_POSSIBLE_VALUES}) - list(FIND PNG_POWERPC_VSX_POSSIBLE_VALUES ${PNG_POWERPC_VSX} index) - if(index EQUAL -1) - message(FATAL_ERROR "PNG_POWERPC_VSX must be one of [${PNG_POWERPC_VSX_POSSIBLE_VALUES}]") - elseif(NOT PNG_POWERPC_VSX STREQUAL "off") - set(libpng_powerpc_sources - powerpc/powerpc_init.c - powerpc/filter_vsx_intrinsics.c) - if(PNG_POWERPC_VSX STREQUAL "on") - add_definitions(-DPNG_POWERPC_VSX_OPT=2) - endif() - else() - add_definitions(-DPNG_POWERPC_VSX_OPT=0) - endif() -endif() - -# Set definitions and sources for Intel. -if(PNG_TARGET_ARCHITECTURE MATCHES "^(i[3-6]86|x86|amd64)") - set(PNG_INTEL_SSE_POSSIBLE_VALUES on off) - set(PNG_INTEL_SSE "on" - CACHE STRING "Enable INTEL_SSE optimizations: on|off; on is default") - set_property(CACHE PNG_INTEL_SSE - PROPERTY STRINGS ${PNG_INTEL_SSE_POSSIBLE_VALUES}) - list(FIND PNG_INTEL_SSE_POSSIBLE_VALUES ${PNG_INTEL_SSE} index) - if(index EQUAL -1) - message(FATAL_ERROR "PNG_INTEL_SSE must be one of [${PNG_INTEL_SSE_POSSIBLE_VALUES}]") - elseif(NOT PNG_INTEL_SSE STREQUAL "off") - set(libpng_intel_sources - intel/intel_init.c - intel/filter_sse2_intrinsics.c) - if(PNG_INTEL_SSE STREQUAL "on") - add_definitions(-DPNG_INTEL_SSE_OPT=1) - endif() - else() - add_definitions(-DPNG_INTEL_SSE_OPT=0) - endif() -endif() - -# Set definitions and sources for MIPS. -if(PNG_TARGET_ARCHITECTURE MATCHES "^(mipsel|mips64el)") - set(PNG_MIPS_MSA_POSSIBLE_VALUES on off) - set(PNG_MIPS_MSA "on" - CACHE STRING "Enable MIPS_MSA optimizations: on|off; on is default") - set_property(CACHE PNG_MIPS_MSA - PROPERTY STRINGS ${PNG_MIPS_MSA_POSSIBLE_VALUES}) - list(FIND PNG_MIPS_MSA_POSSIBLE_VALUES ${PNG_MIPS_MSA} index_msa) - if(index_msa EQUAL -1) - message(FATAL_ERROR "PNG_MIPS_MSA must be one of [${PNG_MIPS_MSA_POSSIBLE_VALUES}]") - endif() - - set(PNG_MIPS_MMI_POSSIBLE_VALUES on off) - set(PNG_MIPS_MMI "on" - CACHE STRING "Enable MIPS_MMI optimizations: on|off; on is default") - set_property(CACHE PNG_MIPS_MMI - PROPERTY STRINGS ${PNG_MIPS_MMI_POSSIBLE_VALUES}) - list(FIND PNG_MIPS_MMI_POSSIBLE_VALUES ${PNG_MIPS_MMI} index_mmi) - if(index_mmi EQUAL -1) - message(FATAL_ERROR "PNG_MIPS_MMI must be one of [${PNG_MIPS_MMI_POSSIBLE_VALUES}]") - endif() - - if(PNG_MIPS_MSA STREQUAL "on" AND PNG_MIPS_MMI STREQUAL "on") - set(libpng_mips_sources - mips/mips_init.c - mips/filter_msa_intrinsics.c - mips/filter_mmi_inline_assembly.c) - add_definitions(-DPNG_MIPS_MSA_OPT=2) - add_definitions(-DPNG_MIPS_MMI_OPT=1) - elseif(PNG_MIPS_MSA STREQUAL "on") - set(libpng_mips_sources - mips/mips_init.c - mips/filter_msa_intrinsics.c) - add_definitions(-DPNG_MIPS_MSA_OPT=2) - add_definitions(-DPNG_MIPS_MMI_OPT=0) - elseif(PNG_MIPS_MMI STREQUAL "on") - set(libpng_mips_sources - mips/mips_init.c - mips/filter_mmi_inline_assembly.c) - add_definitions(-DPNG_MIPS_MSA_OPT=0) - add_definitions(-DPNG_MIPS_MMI_OPT=1) - else() - add_definitions(-DPNG_MIPS_MSA_OPT=0) - add_definitions(-DPNG_MIPS_MMI_OPT=0) - endif() -endif() - -# Set definitions and sources for LoongArch. -if(PNG_TARGET_ARCHITECTURE MATCHES "^(loongarch)") - include(CheckCCompilerFlag) - set(PNG_LOONGARCH_LSX_POSSIBLE_VALUES on off) - set(PNG_LOONGARCH_LSX "on" - CACHE STRING "Enable LOONGARCH_LSX optimizations: on|off; on is default") - set_property(CACHE PNG_LOONGARCH_LSX - PROPERTY STRINGS ${PNG_LOONGARCH_LSX_POSSIBLE_VALUES}) - list(FIND PNG_LOONGARCH_LSX_POSSIBLE_VALUES ${PNG_LOONGARCH_LSX} index) - if(index EQUAL -1) - message(FATAL_ERROR "PNG_LOONGARCH_LSX must be one of [${PNG_LOONGARCH_LSX_POSSIBLE_VALUES}]") - elseif(NOT PNG_LOONGARCH_LSX STREQUAL "off") - CHECK_C_COMPILER_FLAG("-mlsx" COMPILER_SUPPORTS_LSX) - if(COMPILER_SUPPORTS_LSX) - set(libpng_loongarch_sources - loongarch/loongarch_lsx_init.c - loongarch/filter_lsx_intrinsics.c) - set_source_files_properties(${libpng_loongarch_sources} - PROPERTIES - COMPILE_FLAGS "-mlsx") - add_definitions(-DPNG_LOONGARCH_LSX_OPT=1) - else() - message(FATAL_ERROR "Compiler does not support -mlsx option") - endif() - else() - add_definitions(-DPNG_LOONGARCH_LSX_OPT=0) - endif() -endif() - -else(PNG_HARDWARE_OPTIMIZATIONS) - -# Set definitions and sources for ARM. -if(PNG_TARGET_ARCHITECTURE MATCHES "^(arm|aarch)") - add_definitions(-DPNG_ARM_NEON_OPT=0) -endif() - -# Set definitions and sources for PowerPC. -if(PNG_TARGET_ARCHITECTURE MATCHES "^(powerpc|ppc64)") - add_definitions(-DPNG_POWERPC_VSX_OPT=0) -endif() - -# Set definitions and sources for Intel. -if(PNG_TARGET_ARCHITECTURE MATCHES "^(i[3-6]86|x86|amd64)") - add_definitions(-DPNG_INTEL_SSE_OPT=0) -endif() - -# Set definitions and sources for MIPS. -if(PNG_TARGET_ARCHITECTURE MATCHES "^(mipsel|mips64el)") - add_definitions(-DPNG_MIPS_MSA_OPT=0) -endif() - -# Set definitions and sources for LoongArch. -if(PNG_TARGET_ARCHITECTURE MATCHES "^(loongarch)") - add_definitions(-DPNG_LOONGARCH_LSX_OPT=0) -endif() - -endif(PNG_HARDWARE_OPTIMIZATIONS) - option(ld-version-script "Enable linker version script" ON) if(ld-version-script AND NOT ANDROID AND NOT APPLE) # Check if LD supports linker scripts. @@ -628,11 +446,7 @@ set(libpng_sources pngwrite.c pngwtran.c pngwutil.c - ${libpng_arm_sources} - ${libpng_intel_sources} - ${libpng_mips_sources} - ${libpng_powerpc_sources} - ${libpng_loongarch_sources} + pngsimd.c ) set(pngtest_sources pngtest.c diff --git a/Makefile.am b/Makefile.am index eed986c2b8..d6709bfcea 100644 --- a/Makefile.am +++ b/Makefile.am @@ -104,45 +104,9 @@ lib_LTLIBRARIES=libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@.la libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\ pngget.c pngmem.c pngpread.c pngread.c pngrio.c pngrtran.c pngrutil.c\ pngset.c pngtrans.c pngwio.c pngwrite.c pngwtran.c pngwutil.c\ + pngsimd.c\ png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa -if PNG_ARM_NEON -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\ - arm/filter_neon_intrinsics.c \ - arm/palette_neon_intrinsics.c -endif - -if PNG_MIPS_MSA -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c\ - mips/filter_msa_intrinsics.c -endif - -if PNG_MIPS_MMI -if !PNG_MIPS_MSA -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c -endif -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/filter_mmi_inline_assembly.c -endif - -if PNG_INTEL_SSE -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += intel/intel_init.c\ - intel/filter_sse2_intrinsics.c -endif - -if PNG_POWERPC_VSX -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += powerpc/powerpc_init.c\ - powerpc/filter_vsx_intrinsics.c -endif - -if PNG_LOONGARCH_LSX -noinst_LTLIBRARIES= libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx_la_SOURCES = loongarch/loongarch_lsx_init.c\ - loongarch/filter_lsx_intrinsics.c -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx_la_CFLAGS = -mlsx -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LIBADD = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la -# libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la -endif - nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LDFLAGS = -no-undefined -export-dynamic \ @@ -163,10 +127,6 @@ else libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES = libpng.sym endif -if PNG_LOONGARCH_LSX - libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES += libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la -endif - #distribute headers in /usr/include/libpng/* pkgincludedir= $(includedir)/$(PNGLIB_BASENAME) pkginclude_HEADERS= png.h pngconf.h diff --git a/arm/arm_init.c b/arm/arm_init.c index 84d05556f8..44f92f7b2a 100644 --- a/arm/arm_init.c +++ b/arm/arm_init.c @@ -9,113 +9,27 @@ * For conditions of distribution and use, see the disclaimer * and license in png.h */ +#define png_target_impl "arm-neon" -/* This module requires POSIX 1003.1 functions. */ -#define _POSIX_SOURCE 1 - -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -#if PNG_ARM_NEON_OPT > 0 -#ifdef PNG_ARM_NEON_CHECK_SUPPORTED /* Do run-time checks */ -/* WARNING: it is strongly recommended that you do not build libpng with - * run-time checks for CPU features if at all possible. In the case of the ARM - * NEON instructions there is no processor-specific way of detecting the - * presence of the required support, therefore run-time detection is extremely - * OS specific. - * - * You may set the macro PNG_ARM_NEON_FILE to the file name of file containing - * a fragment of C source code which defines the png_have_neon function. There - * are a number of implementations in contrib/arm-neon, but the only one that - * has partial support is contrib/arm-neon/linux.c - a generic Linux - * implementation which reads /proc/cpufino. - */ -#include /* for sig_atomic_t */ - -#ifndef PNG_ARM_NEON_FILE -# if defined(__aarch64__) || defined(_M_ARM64) - /* ARM Neon is expected to be unconditionally available on ARM64. */ -# error "PNG_ARM_NEON_CHECK_SUPPORTED must not be defined on ARM64" -# elif defined(__ARM_NEON__) || defined(__ARM_NEON) - /* ARM Neon is expected to be available on the target CPU architecture. */ -# error "PNG_ARM_NEON_CHECK_SUPPORTED must not be defined on this CPU arch" -# elif defined(__linux__) -# define PNG_ARM_NEON_FILE "contrib/arm-neon/linux.c" -# else -# error "No support for run-time ARM Neon checking; use compile-time options" -# endif +#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64) +# include +#else +# include #endif -static int png_have_neon(png_structp png_ptr); -#ifdef PNG_ARM_NEON_FILE -# include PNG_ARM_NEON_FILE -#endif -#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */ +/* Obtain the definitions of the actual filter functions: */ +#include "filter_neon_intrinsics.c" -#ifndef PNG_ALIGNED_MEMORY_SUPPORTED -# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED" -#endif - -void +static void png_init_filter_functions_neon(png_structp pp, unsigned int bpp) { - /* The switch statement is compiled in for ARM_NEON_API, the call to - * png_have_neon is compiled in for ARM_NEON_CHECK. If both are defined - * the check is only performed if the API has not set the NEON option on - * or off explicitly. In this case the check controls what happens. - * - * If the CHECK is not compiled in and the option is UNSET the behavior prior - * to 1.6.7 was to use the NEON code - this was a bug caused by having the - * wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF, - * as documented in png.h - */ png_debug(1, "in png_init_filter_functions_neon"); -#ifdef PNG_ARM_NEON_API_SUPPORTED - switch ((pp->options >> PNG_ARM_NEON) & 3) - { - case PNG_OPTION_UNSET: - /* Allow the run-time check to execute if it has been enabled - - * thus both API and CHECK can be turned on. If it isn't supported - * this case will fall through to the 'default' below, which just - * returns. - */ -#endif /* PNG_ARM_NEON_API_SUPPORTED */ -#ifdef PNG_ARM_NEON_CHECK_SUPPORTED - { - static volatile sig_atomic_t no_neon = -1; /* not checked */ - - if (no_neon < 0) - no_neon = !png_have_neon(pp); - if (no_neon) - return; - } -#ifdef PNG_ARM_NEON_API_SUPPORTED - break; -#endif -#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */ - -#ifdef PNG_ARM_NEON_API_SUPPORTED - default: /* OFF or INVALID */ - return; - - case PNG_OPTION_ON: - /* Option turned on */ - break; - } -#endif - - /* IMPORTANT: any new external functions used here must be declared using - * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the - * 'prefix' option to configure works: - * - * ./configure --with-libpng-prefix=foobar_ + /* IMPORTANT: DO NOT DEFINE EXTERNAL FUNCTIONS HERE * - * Verify you have got this right by running the above command, doing a build - * and examining pngprefix.h; it must contain a #define for every external - * function you add. (Notice that this happens automatically for the - * initialization function.) + * This is because external functions must be declared with + * PNG_INTERNAL_FUNCTION in pngpriv.h; without this the PNG_PREFIX option to + * the build will not work (it will not know about these symbols). */ pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon; @@ -135,5 +49,26 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp) png_read_filter_row_paeth4_neon; } } -#endif /* PNG_ARM_NEON_OPT > 0 */ -#endif /* READ */ + +#define png_target_init_filter_functions_impl png_init_filter_functions_neon + +#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE /*TODO*/ +#include "palette_neon_intrinsics.c" + +/* TODO: + * png_target_free_data_impl + * Must be defined if the implementation stores data in + * png_struct::target_data. Need not be defined otherwise. + * + * png_target_init_palette_support_impl + * Contains code to initialize a palette transformation. This returns + * true if something has been set up. Only called if the state contains + * png_target_palette, need not be defined, may cancel the state flag + * in the png_struct to prevent further calls. + * + * png_target_do_expand_palette + * Handles palette expansion. Need not be defined, only called if the + * state contains png_target_palette, may set this flag to zero, may + * return false to indicate that the expansion was not done. + */ +#endif /*TODO*/ diff --git a/arm/check.h b/arm/check.h new file mode 100644 index 0000000000..e9b0696cb1 --- /dev/null +++ b/arm/check.h @@ -0,0 +1,19 @@ +/* arm/check.h - NEON optimised filter functions + * + * Copyright (c) 2018-2022 Cosmin Truta + * Copyright (c) 2014,2016 Glenn Randers-Pehrson + * Written by Mans Rullgard, 2011. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +# define PNG_TARGET_CODE_IMPLEMENTATION "arm/arm_init.c" +# define PNG_TARGET_IMPLEMENTS_FILTERS +# ifdef PNG_READ_EXPAND_SUPPORTED + /*TODO: # define PNG_TARGET_STORES_DATA */ + /*TODO: # define PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE */ +# endif /* READ_EXPAND */ +# define PNG_TARGET_ROW_ALIGNMENT 16 +#endif /* ARM_NEON */ diff --git a/arm/filter_neon_intrinsics.c b/arm/filter_neon_intrinsics.c index 4466d48b20..53d9bdb466 100644 --- a/arm/filter_neon_intrinsics.c +++ b/arm/filter_neon_intrinsics.c @@ -1,4 +1,3 @@ - /* filter_neon_intrinsics.c - NEON optimised filter functions * * Copyright (c) 2018 Cosmin Truta @@ -11,18 +10,7 @@ * and license in png.h */ -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -/* This code requires -mfpu=neon on the command line: */ -#if PNG_ARM_NEON_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ - -#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64) -# include -#else -# include -#endif +/* [[libpng-1.8]] this is file is included by arm/arm_init.c */ /* libpng row pointers are not necessarily aligned to any particular boundary, * however this code will only work with appropriate alignment. arm/arm_init.c @@ -45,9 +33,7 @@ #define png_ldr(type,pointer)\ (temp_pointer = png_ptr(type,pointer), *temp_pointer) -#if PNG_ARM_NEON_OPT > 0 - -void +static void png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { @@ -68,7 +54,7 @@ png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row, } } -void +static void png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { @@ -115,7 +101,7 @@ png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row, PNG_UNUSED(prev_row) } -void +static void png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { @@ -147,7 +133,7 @@ png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row, PNG_UNUSED(prev_row) } -void +static void png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { @@ -215,7 +201,7 @@ png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row, } } -void +static void png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { @@ -284,7 +270,7 @@ paeth(uint8x8_t a, uint8x8_t b, uint8x8_t c) return e; } -void +static void png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { @@ -352,7 +338,7 @@ png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row, } } -void +static void png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { @@ -396,7 +382,3 @@ png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row, vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0); } } - -#endif /* PNG_ARM_NEON_OPT > 0 */ -#endif /* PNG_ARM_NEON_IMPLEMENTATION == 1 (intrinsics) */ -#endif /* READ */ diff --git a/arm/palette_neon_intrinsics.c b/arm/palette_neon_intrinsics.c index 92c7d6f9f6..706daef167 100644 --- a/arm/palette_neon_intrinsics.c +++ b/arm/palette_neon_intrinsics.c @@ -1,4 +1,3 @@ - /* palette_neon_intrinsics.c - NEON optimised palette expansion functions * * Copyright (c) 2018-2019 Cosmin Truta @@ -10,18 +9,8 @@ * and license in png.h */ -#include "../pngpriv.h" - -#if PNG_ARM_NEON_IMPLEMENTATION == 1 - -#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64) -# include -#else -# include -#endif - /* Build an RGBA8 palette from the separate RGB and alpha palettes. */ -void +static void png_riffle_palette_neon(png_structrp png_ptr) { png_const_colorp palette = png_ptr->palette; @@ -58,7 +47,7 @@ png_riffle_palette_neon(png_structrp png_ptr) } /* Expands a palettized row into RGBA8. */ -int +static int png_do_expand_palette_rgba8_neon(png_structrp png_ptr, png_row_infop row_info, png_const_bytep row, png_bytepp ssp, png_bytepp ddp) { @@ -103,7 +92,7 @@ png_do_expand_palette_rgba8_neon(png_structrp png_ptr, png_row_infop row_info, } /* Expands a palettized row into RGB8. */ -int +static int png_do_expand_palette_rgb8_neon(png_structrp png_ptr, png_row_infop row_info, png_const_bytep row, png_bytepp ssp, png_bytepp ddp) { @@ -147,5 +136,3 @@ png_do_expand_palette_rgb8_neon(png_structrp png_ptr, png_row_infop row_info, *ddp = *ddp - ((i << 1) + i); return i; } - -#endif /* PNG_ARM_NEON_IMPLEMENTATION */ diff --git a/configure.ac b/configure.ac index 2c6b3333c6..426902cb64 100644 --- a/configure.ac +++ b/configure.ac @@ -324,343 +324,14 @@ AC_ARG_ENABLE([hardware-optimizations], [Enable hardware optimizations: =no/off, yes/on.]), [case "$enableval" in no|off) - # disable hardware optimization on all systems: - enable_arm_neon=no - AC_DEFINE([PNG_ARM_NEON_OPT], [0], - [Disable ARM_NEON optimizations]) - enable_mips_msa=no - AC_DEFINE([PNG_MIPS_MSA_OPT], [0], - [Disable MIPS_MSA optimizations]) - enable_mips_mmi=no - AC_DEFINE([PNG_MIPS_MMI_OPT], [0], - [Disable MIPS_MMI optimizations]) - enable_powerpc_vsx=no - AC_DEFINE([PNG_POWERPC_VSX_OPT], [0], - [Disable POWERPC VSX optimizations]) - enable_intel_sse=no - AC_DEFINE([PNG_INTEL_SSE_OPT], [0], - [Disable INTEL_SSE optimizations]) - enable_loongarch_lsx=no - AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [0], - [Disable LOONGARCH_LSX optimizations]) + # disable hardware optimization on all systems + AC_DEFINE([PNG_NO_HARDWARE], [1], + [Disable hardware specific optimizations]) ;; *) - # allow enabling hardware optimization on any system: - case "$host_cpu" in - arm*|aarch64*) - enable_arm_neon=yes - AC_DEFINE([PNG_ARM_NEON_OPT], [2], - [Enable ARM_NEON optimizations]) - ;; - mipsel*|mips64el*) - enable_mips_mmi=yes - enable_mips_msa=yes - AC_DEFINE([PNG_MIPS_MMI_OPT], [1], - [Enable MIPS_MMI optimizations]) - AC_DEFINE([PNG_MIPS_MSA_OPT], [2], - [Enable MIPS_MSA optimizations]) - ;; - i?86|x86_64) - enable_intel_sse=yes - AC_DEFINE([PNG_INTEL_SSE_OPT], [1], - [Enable Intel SSE optimizations]) - ;; - powerpc*|ppc64*) - enable_powerpc_vsx=yes - AC_DEFINE([PNG_POWERPC_VSX_OPT], [2], - [Enable POWERPC VSX optimizations]) - ;; - loongarch*) - enable_loongarch_lsx=yes - AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1], - [Enable LOONGARCH_LSX optimizations]) - ;; - esac ;; esac]) -# ARM NEON -# ======== - -AC_ARG_ENABLE([arm-neon], - AS_HELP_STRING([[[--enable-arm-neon]]], - [Enable ARM NEON optimizations: =no/off, check, api, yes/on.] - [no/off: disable the optimizations;] - [check: use internal checking code (deprecated and poorly supported);] - [api: disable by default, enable by a call to png_set_option;] - [yes/on: turn on unconditionally.] - [If not specified: determined by the compiler.]), - [case "$enableval" in - no|off) - # disable the default enabling on __ARM_NEON__ systems: - AC_DEFINE([PNG_ARM_NEON_OPT], [0], - [Disable ARM Neon optimizations]) - # Prevent inclusion of the assembler files below: - enable_arm_neon=no ;; - check) - AC_DEFINE([PNG_ARM_NEON_CHECK_SUPPORTED], [], - [Check for ARM Neon support at run-time]);; - api) - AC_DEFINE([PNG_ARM_NEON_API_SUPPORTED], [], - [Turn on ARM Neon optimizations at run-time]);; - yes|on) - AC_DEFINE([PNG_ARM_NEON_OPT], [2], - [Enable ARM Neon optimizations]) - AC_MSG_WARN([--enable-arm-neon: please specify 'check' or 'api';] - [if you want the optimizations unconditionally,] - [pass '-mfpu=neon' to the compiler.]);; - *) - AC_MSG_ERROR([--enable-arm-neon=${enable_arm_neon}:] - [invalid argument]) - esac]) - -# Add ARM-specific files to all builds where $host_cpu is arm ('arm*') or -# where ARM optimizations were explicitly requested. (This allows a fallback -# if a future host CPU does not match 'arm*'.) - -AM_CONDITIONAL([PNG_ARM_NEON], - [test "$enable_arm_neon" != 'no' && - case "$host_cpu" in - arm*|aarch64*) : ;; - *) test "$enable_arm_neon" != '' ;; - esac]) - -# MIPS MSA -# ======== - -AC_ARG_ENABLE([mips-msa], - AS_HELP_STRING([[[--enable-mips-msa]]], - [Enable MIPS MSA optimizations: =no/off, check, api, yes/on.] - [no/off: disable the optimizations;] - [check: use internal checking code (deprecated and poorly supported);] - [api: disable by default, enable by a call to png_set_option;] - [yes/on: turn on unconditionally.] - [If not specified: determined by the compiler.]), - [case "$enableval" in - no|off) - # disable the default enabling on __mips_msa systems: - AC_DEFINE([PNG_MIPS_MSA_OPT], [0], - [Disable MIPS MSA optimizations]) - # Prevent inclusion of the assembler files below: - enable_mips_msa=no ;; - check) - AC_DEFINE([PNG_MIPS_MSA_CHECK_SUPPORTED], [], - [Check for MIPS MSA support at run-time]);; - api) - AC_DEFINE([PNG_MIPS_MSA_API_SUPPORTED], [], - [Turn on MIPS MSA optimizations at run-time]);; - yes|on) - AC_DEFINE([PNG_MIPS_MSA_OPT], [2], - [Enable MIPS MSA optimizations]) - AC_MSG_WARN([--enable-mips-msa: please specify 'check' or 'api';] - [if you want the optimizations unconditionally,] - [pass '-mmsa -mfp64' to the compiler.]);; - *) - AC_MSG_ERROR([--enable-mips-msa=${enable_mips_msa}:] - [invalid argument]) - esac]) - -# Add MIPS-specific files to all builds where $host_cpu is mips ('mips*') or -# where MIPS optimizations were explicitly requested. (This allows a fallback -# if a future host CPU does not match 'mips*'.) - -AM_CONDITIONAL([PNG_MIPS_MSA], - [test "$enable_mips_msa" != 'no' && - case "$host_cpu" in - mipsel*|mips64el*) : ;; - esac]) - -# MIPS MMI -# ======== - -AC_ARG_ENABLE([mips-mmi], - AS_HELP_STRING([[[--enable-mips-mmi]]], - [Enable MIPS MMI optimizations: =no/off, check, api, yes/on.] - [no/off: disable the optimizations;] - [check: use internal checking code (deprecated and poorly supported);] - [api: disable by default, enable by a call to png_set_option;] - [yes/on: turn on unconditionally.] - [If not specified: determined by the compiler.]), - [case "$enableval" in - no|off) - # disable the default enabling on __mips_mmi systems: - AC_DEFINE([PNG_MIPS_MMI_OPT], [0], - [Disable MIPS MMI optimizations]) - # Prevent inclusion of the assembler files below: - enable_mips_mmi=no;; - check) - AC_DEFINE([PNG_MIPS_MMI_CHECK_SUPPORTED], [], - [Check for MIPS MMI support at run-time]);; - api) - AC_DEFINE([PNG_MIPS_MMI_API_SUPPORTED], [], - [Turn on MIPS MMI optimizations at run-time]);; - yes|on) - AC_DEFINE([PNG_MIPS_MMI_OPT], [1], - [Enable MIPS MMI optimizations]) - AC_MSG_WARN([--enable-mips-mmi: please specify 'check' or 'api';] - [if you want the optimizations unconditionally] - [pass '-mloongson-mmi -march=loongson3a' to the compiler.]);; - *) - AC_MSG_ERROR([--enable-mips-mmi=${enable_mips_mmi}:] - [invalid argument]) - esac]) - -# Add MIPS specific files to all builds where the host_cpu is mips ('mips*') or -# where MIPS optimizations were explicitly requested. (This allows a fallback -# if a future host CPU does not match 'mips*'.) - -AM_CONDITIONAL([PNG_MIPS_MMI], - [test "$enable_mips_mmi" != 'no' && - case "$host_cpu" in - mipsel*|mips64el*) : ;; - esac]) - -# INTEL SSE -# ========= - -AC_ARG_ENABLE([intel-sse], - AS_HELP_STRING([[[--enable-intel-sse]]], - [Enable Intel SSE optimizations: =no/off, yes/on.] - [no/off: disable the optimizations;] - [yes/on: enable the optimizations.] - [If not specified: determined by the compiler.]), - [case "$enableval" in - no|off) - # disable the default enabling: - AC_DEFINE([PNG_INTEL_SSE_OPT], [0], - [Disable Intel SSE optimizations]) - # Prevent inclusion of the assembler files below: - enable_intel_sse=no ;; - yes|on) - AC_DEFINE([PNG_INTEL_SSE_OPT], [1], - [Enable Intel SSE optimizations]);; - *) - AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}:] - [invalid argument]) - esac]) - -# Add Intel-specific files to all builds where $host_cpu is Intel ('x86*') or -# where Intel optimizations were explicitly requested. (This allows a fallback -# if a future host CPU does not match 'x86*'.) -AM_CONDITIONAL([PNG_INTEL_SSE], - [test "$enable_intel_sse" != 'no' && - case "$host_cpu" in - i?86|x86_64) : ;; - *) test "$enable_intel_sse" != '' ;; - esac]) - -# POWERPC VSX -# =========== - -AC_ARG_ENABLE([powerpc-vsx], -AS_HELP_STRING([[[--enable-powerpc-vsx]]], - [Enable POWERPC VSX optimizations: =no/off, check, api, yes/on.] - [no/off: disable the optimizations;] - [check: use internal checking code;] - [api: disable by default, enable by a call to png_set_option;] - [yes/on: turn on unconditionally.] - [If not specified: determined by the compiler.]), - [case "$enableval" in - no|off) - # disable the default enabling on __ppc64__ systems: - AC_DEFINE([PNG_POWERPC_VSX_OPT], [0], - [Disable POWERPC VSX optimizations]) - # Prevent inclusion of the platform-specific files below: - enable_powerpc_vsx=no ;; - check) - AC_DEFINE([PNG_POWERPC_VSX_CHECK_SUPPORTED], [], - [Check for POWERPC VSX support at run-time]) - AC_MSG_WARN([--enable-powerpc-vsx: please see contrib/powerpc/README] - [for the list of supported systems.]);; - api) - AC_DEFINE([PNG_POWERPC_VSX_API_SUPPORTED], [], - [Turn on POWERPC VSX optimizations at run-time]);; - yes|on) - AC_DEFINE([PNG_POWERPC_VSX_OPT], [2], - [Enable POWERPC VSX optimizations]) - AC_MSG_WARN([--enable-powerpc-vsx: please specify 'check' or 'api';] - [if you want the optimizations unconditionally,] - [pass '-maltivec -mvsx' or '-mcpu=power8' to the compiler.]);; - *) - AC_MSG_ERROR([--enable-powerpc-vsx=${enable_powerpc_vsx}:] - [invalid argument]) - esac]) - -# Add PowerPC-specific files to all builds where $host_cpu is powerpc -# ('powerpc*') or where PowerPC optimizations were explicitly requested. -# (This allows a fallback if a future host CPU does not match 'powerpc*'.) - -AM_CONDITIONAL([PNG_POWERPC_VSX], - [test "$enable_powerpc_vsx" != 'no' && - case "$host_cpu" in - powerpc*|ppc64*) : ;; - esac]) - -# LOONGARCH LSX -# ============= - -AC_ARG_ENABLE([loongarch-lsx], - AS_HELP_STRING([[[--enable-loongarch-lsx]]], - [Enable LOONGARCH LSX optimizations: =no/off, yes/on:] - [no/off: disable the optimizations;] - [yes/on: turn on unconditionally.] - [If not specified: determined by the compiler.]), - [case "$enableval" in - no|off) - # disable the default enabling on __loongarch_simd systems: - AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [0], - [Disable LOONGARCH LSX optimizations]) - # Prevent inclusion of the assembler files below: - enable_loongarch_lsx=no;; - yes|on) - AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1], - [Enable LOONGARCH LSX optimizations]) - ;; - *) - AC_MSG_ERROR([--enable-loongarch-lsx=${enable_loongarch_lsx}:] - [invalid argument]) - esac]) - -if test "$enable_loongarch_lsx" != "no" && - case "$host_cpu" in - loongarch*) : ;; - *) test "$enable_loongarch_lsx" != '' ;; - esac -then - compiler_support_loongarch_lsx=no - AC_MSG_CHECKING(whether to use LoongArch LSX intrinsics) - save_CFLAGS="$CFLAGS" - LSX_CFLAGS="${LSX_CFLAGS:-"-mlsx"}" - CFLAGS="$CFLAGS $LSX_CFLAGS" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -#include -int main(){ - __m128i a, b, c; - a = __lsx_vadd_w(b, c); - return 0; -}]])],compiler_support_loongarch_lsx=yes) - CFLAGS="$save_CFLAGS" - AC_MSG_RESULT($compiler_support_loongarch_lsx) - if test "$compiler_support_loongarch_lsx" = "yes"; then - AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1], - [Enable LOONGARCH LSX optimizations]) - else - AC_MSG_WARN([Compiler does not support loongarch LSX.]) - fi -fi - -# Add LoongArch specific files to all builds where the host_cpu is loongarch -# ('loongarch*') or where LoongArch optimizations were explicitly requested. -# (This allows a fallback if a future host CPU does not match 'loongarch*'.) - -AM_CONDITIONAL([PNG_LOONGARCH_LSX], - [test "$enable_loongarch_lsx" != "no" && - test "$compiler_support_loongarch_lsx" = "yes" && - case "$host_cpu" in - loongarch*) : ;; - *) test "$enable_loongarch_lsx" != '' ;; - esac]) - AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]]) # Config files, substituting as above diff --git a/intel/check.h b/intel/check.h new file mode 100644 index 0000000000..47ce7091f9 --- /dev/null +++ b/intel/check.h @@ -0,0 +1,35 @@ +/* intel/check.h - SSE2 optimized filter functions + * + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 2016-2017 Glenn Randers-Pehrson + * Written by Mike Klein and Matt Sarett, Google, Inc. + * Derived from arm/arm_init.c + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ +/* PNG_INTEL_SSE_IMPLEMENTATION is used in the actual implementation to selecct + * the correct code. + */ +#if defined(__SSE4_1__) || defined(__AVX__) + /* We are not actually using AVX, but checking for AVX is the best way we can + * detect SSE4.1 and SSSE3 on MSVC. + */ +# define PNG_INTEL_SSE_IMPLEMENTATION 3 +#elif defined(__SSSE3__) +# define PNG_INTEL_SSE_IMPLEMENTATION 2 +#elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) ||\ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +# define PNG_INTEL_SSE_IMPLEMENTATION 1 +#else +# define PNG_INTEL_SSE_IMPLEMENTATION 0 +#endif + +#if PNG_INTEL_SSE_IMPLEMENTATION > 0 +# define PNG_TARGET_CODE_IMPLEMENTATION "intel/intel_init.c" + /*PNG_TARGET_STORES_DATA*/ +# define PNG_TARGET_IMPLEMENTS_FILTERS + /*PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE*/ +# define PNG_TARGET_ROW_ALIGNMENT 16 +#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */ diff --git a/intel/filter_sse2_intrinsics.c b/intel/filter_sse2_intrinsics.c index d3c0fe9e2d..7e0dd20606 100644 --- a/intel/filter_sse2_intrinsics.c +++ b/intel/filter_sse2_intrinsics.c @@ -1,4 +1,3 @@ - /* filter_sse2_intrinsics.c - SSE2 optimized filter functions * * Copyright (c) 2018 Cosmin Truta @@ -10,13 +9,6 @@ * For conditions of distribution and use, see the disclaimer * and license in png.h */ - -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -#if PNG_INTEL_SSE_IMPLEMENTATION > 0 - #include /* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d). @@ -49,7 +41,7 @@ static void store3(void* p, __m128i v) { memcpy(p, &tmp, 3); } -void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row, +static void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* The Sub filter predicts each pixel as the previous pixel, a. @@ -82,7 +74,8 @@ void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row, PNG_UNUSED(prev) } -void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* The Sub filter predicts each pixel as the previous pixel, a. @@ -107,7 +100,8 @@ void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row, PNG_UNUSED(prev) } -void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* The Avg filter predicts each pixel as the (truncated) average of a and b. @@ -162,7 +156,8 @@ void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* The Avg filter predicts each pixel as the (truncated) average of a and b. @@ -226,7 +221,8 @@ static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { #endif } -void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* Paeth tries to predict pixel d using the pixel to the left of it, a, @@ -325,7 +321,8 @@ void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* Paeth tries to predict pixel d using the pixel to the left of it, a, @@ -386,6 +383,3 @@ void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row, rb -= 4; } } - -#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */ -#endif /* READ */ diff --git a/intel/intel_init.c b/intel/intel_init.c index 2f8168b7c4..667dd306a1 100644 --- a/intel/intel_init.c +++ b/intel/intel_init.c @@ -1,4 +1,3 @@ - /* intel_init.c - SSE2 optimized filter functions * * Copyright (c) 2018 Cosmin Truta @@ -10,13 +9,11 @@ * For conditions of distribution and use, see the disclaimer * and license in png.h */ +#define png_target_impl "intel-sse" -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED -#if PNG_INTEL_SSE_IMPLEMENTATION > 0 +#include "filter_sse2_intrinsics.c" -void +static void png_init_filter_functions_sse2(png_structp pp, unsigned int bpp) { /* The techniques used to implement each of these filters in SSE operate on @@ -48,5 +45,4 @@ png_init_filter_functions_sse2(png_structp pp, unsigned int bpp) */ } -#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */ -#endif /* PNG_READ_SUPPORTED */ +#define png_target_init_filter_functions_impl png_init_filter_functions_sse2 diff --git a/loongarch/filter_lsx_intrinsics.c b/loongarch/filter_lsx_intrinsics.c index af6cc763a0..f1b74659b5 100644 --- a/loongarch/filter_lsx_intrinsics.c +++ b/loongarch/filter_lsx_intrinsics.c @@ -10,13 +10,6 @@ * For conditions of distribution and use, see the disclaimer * and license in png.h */ - -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -#if PNG_LOONGARCH_LSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ - #include #define LSX_LD(psrc) __lsx_vld((psrc), 0) @@ -102,8 +95,9 @@ out0 = __lsx_vadd_b(out0, _in3); \ } -void png_read_filter_row_up_lsx(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_up_lsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t n = row_info->rowbytes; png_bytep rp = row; @@ -165,8 +159,9 @@ void png_read_filter_row_up_lsx(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_sub3_lsx(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_sub3_lsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t n = row_info->rowbytes; png_uint_32 tmp; @@ -199,8 +194,9 @@ void png_read_filter_row_sub3_lsx(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_sub4_lsx(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_sub4_lsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t n = row_info->rowbytes; __m128i vec_0, vec_1; @@ -222,8 +218,9 @@ void png_read_filter_row_sub4_lsx(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_avg3_lsx(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_avg3_lsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t n = row_info->rowbytes; png_bytep nxt = row; @@ -275,8 +272,9 @@ void png_read_filter_row_avg3_lsx(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_avg4_lsx(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_avg4_lsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t n = row_info->rowbytes; __m128i vec_0, vec_1, vec_2; @@ -306,9 +304,9 @@ void png_read_filter_row_avg4_lsx(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_paeth3_lsx(png_row_infop row_info, - png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_paeth3_lsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t n = row_info->rowbytes; png_bytep nxt = row; @@ -371,9 +369,9 @@ void png_read_filter_row_paeth3_lsx(png_row_infop row_info, } } -void png_read_filter_row_paeth4_lsx(png_row_infop row_info, - png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_paeth4_lsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t n = row_info->rowbytes; __m128i vec_a, vec_b, vec_c, vec_d; diff --git a/loongarch/loongarch_lsx_init.c b/loongarch/loongarch_lsx_init.c index 2c80fe81b6..e98c53bb33 100644 --- a/loongarch/loongarch_lsx_init.c +++ b/loongarch/loongarch_lsx_init.c @@ -8,14 +8,13 @@ * For conditions of distribution and use, see the disclaimer * and license in png.h */ - -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED -#if PNG_LOONGARCH_LSX_IMPLEMENTATION == 1 +#ifdef __loongarch_sx +#define png_target_impl "loongarch-sx" #include +#include "filter_lsx_intrinsics.c" + #define LA_HWCAP_LSX (1<<4) static int png_has_lsx(void) { @@ -28,21 +27,9 @@ static int png_has_lsx(void) return 0; } -void +static void png_init_filter_functions_lsx(png_structp pp, unsigned int bpp) { - /* IMPORTANT: any new external functions used here must be declared using - * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the - * 'prefix' option to configure works: - * - * ./configure --with-libpng-prefix=foobar_ - * - * Verify you have got this right by running the above command, doing a build - * and examining pngprefix.h; it must contain a #define for every external - * function you add. (Notice that this happens automatically for the - * initialization function.) - */ - if (png_has_lsx()) { pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_lsx; @@ -61,5 +48,6 @@ png_init_filter_functions_lsx(png_structp pp, unsigned int bpp) } } -#endif /* PNG_LOONGARCH_LSX_IMPLEMENTATION == 1 */ -#endif /* PNG_READ_SUPPORTED */ +#define png_target_init_filter_functions png_init_filter_functions_lsx + +#endif /* __loongarch_sx */ diff --git a/mips/check.h b/mips/check.h new file mode 100644 index 0000000000..10fd3addb7 --- /dev/null +++ b/mips/check.h @@ -0,0 +1,54 @@ +/* mips/check.h - MSA optimised filter functions + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 2016 Glenn Randers-Pehrson + * Written by Mandar Sahastrabuddhe, 2016 + * Updated by guxiwei, 2023 + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ +#ifdef PNG_READ_SUPPORTED + +#if defined(__mips_msa) && (__mips_isa_rev >= 5) +# ifndef PNG_MIPS_MSA_IMPLEMENTATION +# if defined(__mips_msa) +# if defined(__clang__) +# elif defined(__GNUC__) +# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 7) +# define PNG_MIPS_MSA_IMPLEMENTATION 2 +# endif /* no GNUC support */ +# endif /* __GNUC__ */ +# else /* !defined __mips_msa */ +# define PNG_MIPS_MSA_IMPLEMENTATION 2 +# endif /* __mips_msa */ +# endif /* !PNG_MIPS_MSA_IMPLEMENTATION */ + +# ifndef PNG_MIPS_MSA_IMPLEMENTATION +# define PNG_MIPS_MSA_IMPLEMENTATION 1 +# endif +#else +# define PNG_MIPS_MSA_IMPLEMENTATION 0 +#endif /* __mips_msa && __mips_isa_rev >= 5 */ + +#if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) +# ifndef PNG_MIPS_MMI_IMPLEMENTATION +# if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) +# define PNG_MIPS_MMI_IMPLEMENTATION 2 +# else /* !defined __mips_loongson_mmi || _MIPS_SIM != _ABI64 */ +# define PNG_MIPS_MMI_IMPLEMENTATION 0 +# endif /* __mips_loongson_mmi && _MIPS_SIM == _ABI64 */ +# endif /* !PNG_MIPS_MMI_IMPLEMENTATION */ +#else +# define PNG_MIPS_MMI_IMPLEMENTATION 0 +#endif /* __mips_loongson_mmi && _MIPS_SIM == _ABI64 */ + +#if PNG_MIPS_MSA_IMPLEMENTATION == 1 || PNG_MIPS_MMI_IMPLEMENTATION > 0 +# define PNG_TARGET_CODE_IMPLEMENTATION "mips/mips_init.c" + /*PNG_TARGET_STORES_DATA*/ +# define PNG_TARGET_IMPLEMENTS_FILTERS + /*PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE*/ +# define PNG_TARGET_ROW_ALIGNMENT 16 +#endif /* MIPS MSA or MMI */ +#endif /* READ */ diff --git a/mips/filter_mmi_inline_assembly.c b/mips/filter_mmi_inline_assembly.c index b330a46538..3edb71d9fc 100644 --- a/mips/filter_mmi_inline_assembly.c +++ b/mips/filter_mmi_inline_assembly.c @@ -8,12 +8,6 @@ * and license in png.h */ -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -#if PNG_MIPS_MMI_IMPLEMENTATION == 2 /* Inline Assembly */ - /* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d). * They're positioned like this: * prev: c b @@ -22,8 +16,9 @@ * whichever of a, b, or c is closest to p=a+b-c. */ -void png_read_filter_row_up_mmi(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_up_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { int istop = row_info->rowbytes; double rp,pp; @@ -45,7 +40,8 @@ void png_read_filter_row_up_mmi(png_row_infop row_info, png_bytep row, ); } -void png_read_filter_row_sub3_mmi(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_sub3_mmi(png_row_infop row_info, png_bytep row, png_const_bytep prev) { int istop = row_info->rowbytes; @@ -104,7 +100,8 @@ void png_read_filter_row_sub3_mmi(png_row_infop row_info, png_bytep row, PNG_UNUSED(prev) } -void png_read_filter_row_sub4_mmi(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_sub4_mmi(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* The Sub filter predicts each pixel as the previous pixel, a. @@ -132,7 +129,8 @@ void png_read_filter_row_sub4_mmi(png_row_infop row_info, png_bytep row, PNG_UNUSED(prev) } -void png_read_filter_row_avg3_mmi(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_avg3_mmi(png_row_infop row_info, png_bytep row, png_const_bytep prev) { int istop = row_info->rowbytes; @@ -224,7 +222,8 @@ void png_read_filter_row_avg3_mmi(png_row_infop row_info, png_bytep row, ); } -void png_read_filter_row_avg4_mmi(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_avg4_mmi(png_row_infop row_info, png_bytep row, png_const_bytep prev) { int istop = row_info->rowbytes; @@ -260,7 +259,8 @@ void png_read_filter_row_avg4_mmi(png_row_infop row_info, png_bytep row, ); } -void png_read_filter_row_paeth3_mmi(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_paeth3_mmi(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* Paeth tries to predict pixel d using the pixel to the left of it, a, @@ -448,7 +448,8 @@ void png_read_filter_row_paeth3_mmi(png_row_infop row_info, png_bytep row, ); } -void png_read_filter_row_paeth4_mmi(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_paeth4_mmi(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* Paeth tries to predict pixel d using the pixel to the left of it, a, @@ -520,6 +521,3 @@ void png_read_filter_row_paeth4_mmi(png_row_infop row_info, png_bytep row, : "memory" ); } - -#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */ -#endif /* READ */ diff --git a/mips/filter_msa_intrinsics.c b/mips/filter_msa_intrinsics.c index 1b734f4d9a..0bf080288e 100644 --- a/mips/filter_msa_intrinsics.c +++ b/mips/filter_msa_intrinsics.c @@ -1,4 +1,3 @@ - /* filter_msa_intrinsics.c - MSA optimised filter functions * * Copyright (c) 2018-2024 Cosmin Truta @@ -9,15 +8,6 @@ * For conditions of distribution and use, see the disclaimer * and license in png.h */ - -#include -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -/* This code requires -mfpu=msa on the command line: */ -#if PNG_MIPS_MSA_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ - #include #include @@ -364,8 +354,9 @@ out0 += inp4; \ } -void png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t i, cnt, cnt16, cnt32; size_t istop = row_info->rowbytes; @@ -455,8 +446,9 @@ void png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_sub4_msa(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_sub4_msa(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t count; size_t istop = row_info->rowbytes; @@ -494,8 +486,9 @@ void png_read_filter_row_sub4_msa(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_sub3_msa(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_sub3_msa(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t count; size_t istop = row_info->rowbytes; @@ -539,8 +532,9 @@ void png_read_filter_row_sub3_msa(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_avg4_msa(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_avg4_msa(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t i; png_bytep src = row; @@ -590,8 +584,9 @@ void png_read_filter_row_avg4_msa(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_avg3_msa(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_avg3_msa(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { size_t i; png_bytep src = row; @@ -651,9 +646,9 @@ void png_read_filter_row_avg3_msa(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_paeth4_msa(png_row_infop row_info, - png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_paeth4_msa(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { int32_t count, rp_end; png_bytep nxt; @@ -722,9 +717,9 @@ void png_read_filter_row_paeth4_msa(png_row_infop row_info, } } -void png_read_filter_row_paeth3_msa(png_row_infop row_info, - png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_paeth3_msa(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { int32_t count, rp_end; png_bytep nxt; @@ -802,7 +797,3 @@ void png_read_filter_row_paeth3_msa(png_row_infop row_info, nxt += 4; } } - -#endif /* PNG_MIPS_MSA_OPT > 0 */ -#endif /* PNG_MIPS_MSA_IMPLEMENTATION == 1 (intrinsics) */ -#endif /* READ */ diff --git a/mips/mips_init.c b/mips/mips_init.c index 5c6fa1dbf1..f80680b5f4 100644 --- a/mips/mips_init.c +++ b/mips/mips_init.c @@ -1,4 +1,3 @@ - /* mips_init.c - MSA optimised filter functions * * Copyright (c) 2018-2024 Cosmin Truta @@ -10,195 +9,74 @@ * For conditions of distribution and use, see the disclaimer * and license in png.h */ - -/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are - * called. - */ -#define _POSIX_SOURCE 1 - -#include -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -#if PNG_MIPS_MSA_IMPLEMENTATION == 1 || PNG_MIPS_MMI_IMPLEMENTATION > 0 - -#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do MIPS MSA run-time checks */ -/* WARNING: it is strongly recommended that you do not build libpng with - * run-time checks for CPU features if at all possible. In the case of the MIPS - * MSA instructions there is no processor-specific way of detecting the - * presence of the required support, therefore run-time detection is extremely - * OS specific. +/* MIPS supports two optimizations: MMI and MSA. When both are available the + * appropriate optimization is chosen at runtime using the png_set_option + * settings. * - * You may set the macro PNG_MIPS_MSA_FILE to the file name of file containing - * a fragment of C source code which defines the png_have_msa function. There - * are a number of implementations in contrib/mips-msa, but the only one that - * has partial support is contrib/mips-msa/linux.c - a generic Linux - * implementation which reads /proc/cpufino. + * NOTE: see also the separate loongson code... */ -#ifndef PNG_MIPS_MSA_FILE -# ifdef __linux__ -# define PNG_MIPS_MSA_FILE "contrib/mips-msa/linux.c" -# endif -#endif - -#ifdef PNG_MIPS_MSA_FILE - -#include /* for sig_atomic_t */ -static int png_have_msa(png_structp png_ptr); -#include PNG_MIPS_MSA_FILE - -#else /* PNG_MIPS_MSA_FILE */ -# error "PNG_MIPS_MSA_FILE undefined: no support for run-time MIPS MSA checks" -#endif /* PNG_MIPS_MSA_FILE */ -#endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */ - -#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED /* Do MIPS MMI run-times checks */ -#ifndef PNG_MIPS_MMI_FILE -# ifdef __linux__ -# define PNG_MIPS_MMI_FILE "contrib/mips-mmi/linux.c" -# endif +#if PNG_MIPS_MSA_IMPLEMENATION == 1 +# include "filter_msa_intrinsics.c" #endif - -#ifdef PNG_MIPS_MMI_FILE - -#include /* for sig_atomic_t */ -static int png_have_mmi(); -#include PNG_MIPS_MMI_FILE - -#else /* PNG_MIPS_MMI_FILE */ -# error "PNG_MIPS_MMI_FILE undefined: no support for run-time MIPS MMI checks" -#endif /* PNG_MIPS_MMI_FILE */ -#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED*/ - -#ifndef PNG_ALIGNED_MEMORY_SUPPORTED -# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED" +#if PNG_MIPS_MMI_IMPLEMENTATION > 0 +# include "filter_mmi_inline_assembly.c" #endif -/* MIPS supports two optimizations: MMI and MSA. The appropriate - * optimization is chosen at runtime - */ -void +static void png_init_filter_functions_mips(png_structp pp, unsigned int bpp) { -#if PNG_MIPS_MMI_IMPLEMENTATION > 0 -#ifdef PNG_MIPS_MMI_API_SUPPORTED - switch ((pp->options >> PNG_MIPS_MMI) & 3) - { - case PNG_OPTION_UNSET: -#endif /* PNG_MIPS_MMI_API_SUPPORTED */ -#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED +# if PNG_MIPS_MMI_IMPLEMENTATION > 0 + /* Check the option if MSA is also supported: */ +# if PNG_MIPS_MSA_IMPLEMENATION == 1 +# define png_target_impl "mips-msa+msi" + /* NOTE: if this is false the code below will not be executed. */ + if (((pp->options >> PNG_MIPS_USE_MMI) & 3) == PNG_OPTION_ON) +# else +# define png_target_impl "mips-mmi" +# endif + { + /* This is the MMI implementation: */ + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_mmi; + if (bpp == 3) { - static volatile sig_atomic_t no_mmi = -1; /* not checked */ - - if (no_mmi < 0) - no_mmi = !png_have_mmi(); - - if (no_mmi) - goto MIPS_MSA_INIT; + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = + png_read_filter_row_sub3_mmi; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = + png_read_filter_row_avg3_mmi; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth3_mmi; } -#ifdef PNG_MIPS_MMI_API_SUPPORTED - break; -#endif -#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED */ - -#ifdef PNG_MIPS_MMI_API_SUPPORTED - default: /* OFF or INVALID */ - goto MIPS_MSA_INIT; - - case PNG_OPTION_ON: - /* Option turned on */ - break; - } -#endif - pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_mmi; - if (bpp == 3) - { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_mmi; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_mmi; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = - png_read_filter_row_paeth3_mmi; - } - else if (bpp == 4) - { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_mmi; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_mmi; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = - png_read_filter_row_paeth4_mmi; - } -#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */ - -MIPS_MSA_INIT: -#if PNG_MIPS_MSA_IMPLEMENTATION == 1 - /* The switch statement is compiled in for MIPS_MSA_API, the call to - * png_have_msa is compiled in for MIPS_MSA_CHECK. If both are defined - * the check is only performed if the API has not set the MSA option on - * or off explicitly. In this case the check controls what happens. - */ - -#ifdef PNG_MIPS_MSA_API_SUPPORTED - switch ((pp->options >> PNG_MIPS_MSA) & 3) - { - case PNG_OPTION_UNSET: - /* Allow the run-time check to execute if it has been enabled - - * thus both API and CHECK can be turned on. If it isn't supported - * this case will fall through to the 'default' below, which just - * returns. - */ -#endif /* PNG_MIPS_MSA_API_SUPPORTED */ -#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED + else if (bpp == 4) { - static volatile sig_atomic_t no_msa = -1; /* not checked */ - - if (no_msa < 0) - no_msa = !png_have_msa(pp); - - if (no_msa) - return; + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = + png_read_filter_row_sub4_mmi; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = + png_read_filter_row_avg4_mmi; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth4_mmi; } -#ifdef PNG_MIPS_MSA_API_SUPPORTED - break; -#endif -#endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */ - -#ifdef PNG_MIPS_MSA_API_SUPPORTED - default: /* OFF or INVALID */ return; - - case PNG_OPTION_ON: - /* Option turned on */ - break; - } -#endif - - /* IMPORTANT: any new external functions used here must be declared using - * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the - * 'prefix' option to configure works: - * - * ./configure --with-libpng-prefix=foobar_ - * - * Verify you have got this right by running the above command, doing a build - * and examining pngprefix.h; it must contain a #define for every external - * function you add. (Notice that this happens automatically for the - * initialization function.) - */ - pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_msa; - - if (bpp == 3) - { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_msa; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_msa; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_msa; - } - - else if (bpp == 4) - { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_msa; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_msa; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_msa; - } -#endif /* PNG_MIPS_MSA_IMPLEMENTATION == 1 */ - return; + } +# else /* !(PNG_MIPS_MMI_IMPLEMENTATION > 0) */ +# define png_target_impl "mips-msa" + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_msa; + + if (bpp == 3) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_msa; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_msa; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth3_msa; + } + + else if (bpp == 4) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_msa; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_msa; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth4_msa; + } +# endif /* PNG_MIPS_MSA_IMPLEMENTATION == 1 */ } -#endif /* PNG_MIPS_MSA_IMPLEMENTATION == 1 || PNG_MIPS_MMI_IMPLEMENTATION > 0 */ -#endif /* READ */ + +#define png_target_init_filter_functions_impl png_init_filter_functions_mips diff --git a/png.c b/png.c index 84e03b5fd7..063c34b0da 100644 --- a/png.c +++ b/png.c @@ -2287,12 +2287,10 @@ png_compare_ICC_profile_with_sRGB(png_const_structrp png_ptr, #endif unsigned int i; -#ifdef PNG_SET_OPTION_SUPPORTED /* First see if PNG_SKIP_sRGB_CHECK_PROFILE has been set to "on" */ if (((png_ptr->options >> PNG_SKIP_sRGB_CHECK_PROFILE) & 3) == PNG_OPTION_ON) return 0; -#endif for (i=0; i < (sizeof png_sRGB_checks) / (sizeof png_sRGB_checks[0]); ++i) { @@ -4265,7 +4263,6 @@ png_build_gamma_table(png_structrp png_ptr, int bit_depth) #endif /* READ_GAMMA */ /* HARDWARE OR SOFTWARE OPTION SUPPORT */ -#ifdef PNG_SET_OPTION_SUPPORTED int PNGAPI png_set_option(png_structrp png_ptr, int option, int onoff) { @@ -4283,7 +4280,6 @@ png_set_option(png_structrp png_ptr, int option, int onoff) return PNG_OPTION_INVALID; } -#endif /* sRGB support */ #if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\ diff --git a/png.h b/png.h index b71bd7064a..c88e8e5c55 100644 --- a/png.h +++ b/png.h @@ -3182,49 +3182,50 @@ PNG_EXPORT(245, int, png_image_write_to_memory, (png_imagep image, void *memory, * option and 'onoff' is 0 (off) or non-0 (on). The value returned is given * by the PNG_OPTION_ defines below. * - * HARDWARE: normally hardware capabilities, such as the Intel SSE instructions, - * are detected at run time, however sometimes it may be impossible - * to do this in user mode, in which case it is necessary to discover - * the capabilities in an OS specific way. Such capabilities are - * listed here when libpng has support for them and must be turned - * ON by the application if present. + * HARDWARE: [[changed in libpng 1.8]] + * Hardware options are now controlled globally to be 'on' or 'off'. + * For backward compatibility the original options are defined as + * the 'new' hardware option. libpng can be compiled without + * hardware support (check PNG_TARGET_SPECIFIC_CODE_SUPPORTED and + * the documenation in pngtarget.h). * * SOFTWARE: sometimes software optimizations actually result in performance * decrease on some architectures or systems, or with some sets of * PNG images. 'Software' options allow such optimizations to be * selected at run time. + * + * The initial setting for HARDWARE is determined by whether or not any + * hardware-specific optimizations are available; the setting will be "ON" if + * so otherwise it will be UNSET. + * + * the option starts of UNSET and this is treated as OFF. */ -#ifdef PNG_SET_OPTION_SUPPORTED -#ifdef PNG_ARM_NEON_API_SUPPORTED -# define PNG_ARM_NEON 0 /* HARDWARE: ARM Neon SIMD instructions supported */ -#endif -#define PNG_MAXIMUM_INFLATE_WINDOW 2 /* SOFTWARE: force maximum window */ -#define PNG_SKIP_sRGB_CHECK_PROFILE 4 /* SOFTWARE: Check ICC profile for sRGB */ -#ifdef PNG_MIPS_MSA_API_SUPPORTED -# define PNG_MIPS_MSA 6 /* HARDWARE: MIPS Msa SIMD instructions supported */ -#endif +#define PNG_SET_OPTION_SUPPORTED +#define PNG_TARGET_SPECIFIC_CODE 0 /* HARDWARE: turn on or off cpu specific code */ +#define PNG_ARM_NEON 0 /* HARDWARE: compatibility */ +#define PNG_MIPS_MSA 0 /* HARDWARE: compatibility */ +#define PNG_POWERPC_VSX 0 /* HARDWARE: compatibility */ +#define PNG_MIPS_MMI 2/* HARDWARE: MIPS: chose MMI over MSA */ +#define PNG_MAXIMUM_INFLATE_WINDOW 4 /* SOFTWARE: force maximum window */ +#define PNG_SKIP_sRGB_CHECK_PROFILE 6 /* SOFTWARE: Check ICC profile for sRGB */ + #ifdef PNG_DISABLE_ADLER32_CHECK_SUPPORTED + /* This has to be disabled in some builds because of the lack of + * functionality in zlib. Check the _SUPPORTED macro. + */ # define PNG_IGNORE_ADLER32 8 /* SOFTWARE: disable Adler32 check on IDAT */ #endif -#ifdef PNG_POWERPC_VSX_API_SUPPORTED -# define PNG_POWERPC_VSX 10 /* HARDWARE: PowerPC VSX SIMD instructions - * supported */ -#endif -#ifdef PNG_MIPS_MMI_API_SUPPORTED -# define PNG_MIPS_MMI 12 /* HARDWARE: MIPS MMI SIMD instructions supported */ -#endif -#define PNG_OPTION_NEXT 14 /* Next option - numbers must be even */ +#define PNG_OPTION_NEXT 10 /* Return values: NOTE: there are four values and 'off' is *not* zero */ -#define PNG_OPTION_UNSET 0 /* Unset - defaults to off */ +#define PNG_OPTION_UNSET 0 /* Unset - defaults as above */ #define PNG_OPTION_INVALID 1 /* Option number out of range */ #define PNG_OPTION_OFF 2 #define PNG_OPTION_ON 3 PNG_EXPORT(244, int, png_set_option, (png_structrp png_ptr, int option, int onoff)); -#endif /* SET_OPTION */ /******************************************************************************* * END OF HARDWARE AND SOFTWARE OPTIONS diff --git a/pngpriv.h b/pngpriv.h index b59084e7eb..9272455fbe 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -71,6 +71,9 @@ #ifndef PNGLCONF_H # include "pnglibconf.h" #endif +#ifndef PNGTARGET_H +# include "pngtarget.h" +#endif /* Local renames may change non-exported API functions from png.h */ #if defined(PNG_PREFIX) && !defined(PNGPREFIX_H) @@ -88,198 +91,6 @@ # endif #endif -/* Compile time options. - * ===================== - * In a multi-arch build the compiler may compile the code several times for the - * same object module, producing different binaries for different architectures. - * When this happens configure-time setting of the target host options cannot be - * done and this interferes with the handling of the ARM NEON optimizations, and - * possibly other similar optimizations. Put additional tests here; in general - * this is needed when the same option can be changed at both compile time and - * run time depending on the target OS (i.e. iOS vs Android.) - * - * NOTE: symbol prefixing does not pass $(CFLAGS) to the preprocessor, because - * this is not possible with certain compilers (Oracle SUN OS CC), as a result - * it is necessary to ensure that all extern functions that *might* be used - * regardless of $(CFLAGS) get declared in this file. The test on __ARM_NEON__ - * below is one example of this behavior because it is controlled by the - * presence or not of -mfpu=neon on the GCC command line, it is possible to do - * this in $(CC), e.g. "CC=gcc -mfpu=neon", but people who build libpng rarely - * do this. - */ -#ifndef PNG_ARM_NEON_OPT - /* ARM NEON optimizations are being controlled by the compiler settings, - * typically the target FPU. If the FPU has been set to NEON (-mfpu=neon - * with GCC) then the compiler will define __ARM_NEON__ and we can rely - * unconditionally on NEON instructions not crashing, otherwise we must - * disable use of NEON instructions. - * - * NOTE: at present these optimizations depend on 'ALIGNED_MEMORY', so they - * can only be turned on automatically if that is supported too. If - * PNG_ARM_NEON_OPT is set in CPPFLAGS (to >0) then arm/arm_init.c will fail - * to compile with an appropriate #error if ALIGNED_MEMORY has been turned - * off. - * - * Note that gcc-4.9 defines __ARM_NEON instead of the deprecated - * __ARM_NEON__, so we check both variants. - * - * To disable ARM_NEON optimizations entirely, and skip compiling the - * associated assembler code, pass --enable-arm-neon=no to configure - * or put -DPNG_ARM_NEON_OPT=0 in CPPFLAGS. - */ -# if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && \ - defined(PNG_ALIGNED_MEMORY_SUPPORTED) -# define PNG_ARM_NEON_OPT 2 -# else -# define PNG_ARM_NEON_OPT 0 -# endif -#endif - -#if PNG_ARM_NEON_OPT > 0 - /* NEON optimizations are to be at least considered by libpng, so enable the - * callbacks to do this. - */ -# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_neon -# ifndef PNG_ARM_NEON_IMPLEMENTATION - /* Use the intrinsics code by default. */ -# define PNG_ARM_NEON_IMPLEMENTATION 1 -# endif -#else /* PNG_ARM_NEON_OPT == 0 */ -# define PNG_ARM_NEON_IMPLEMENTATION 0 -#endif /* PNG_ARM_NEON_OPT > 0 */ - -#ifndef PNG_MIPS_MSA_OPT -# if defined(__mips_msa) && (__mips_isa_rev >= 5) && \ - defined(PNG_ALIGNED_MEMORY_SUPPORTED) -# define PNG_MIPS_MSA_OPT 2 -# else -# define PNG_MIPS_MSA_OPT 0 -# endif -#endif - -#ifndef PNG_MIPS_MMI_OPT -# ifdef PNG_MIPS_MMI -# if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) && \ - defined(PNG_ALIGNED_MEMORY_SUPPORTED) -# define PNG_MIPS_MMI_OPT 1 -# else -# define PNG_MIPS_MMI_OPT 0 -# endif -# else -# define PNG_MIPS_MMI_OPT 0 -# endif -#endif - -#ifndef PNG_POWERPC_VSX_OPT -# if defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) -# define PNG_POWERPC_VSX_OPT 2 -# else -# define PNG_POWERPC_VSX_OPT 0 -# endif -#endif - -#ifndef PNG_LOONGARCH_LSX_OPT -# if defined(__loongarch_sx) -# define PNG_LOONGARCH_LSX_OPT 1 -# else -# define PNG_LOONGARCH_LSX_OPT 0 -# endif -#endif - -#ifndef PNG_INTEL_SSE_OPT -# ifdef PNG_INTEL_SSE - /* Only check for SSE if the build configuration has been modified to - * enable SSE optimizations. This means that these optimizations will - * be off by default. See contrib/intel for more details. - */ -# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ - defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -# define PNG_INTEL_SSE_OPT 1 -# else -# define PNG_INTEL_SSE_OPT 0 -# endif -# else -# define PNG_INTEL_SSE_OPT 0 -# endif -#endif - -#if PNG_INTEL_SSE_OPT > 0 -# ifndef PNG_INTEL_SSE_IMPLEMENTATION -# if defined(__SSE4_1__) || defined(__AVX__) - /* We are not actually using AVX, but checking for AVX is the best - way we can detect SSE4.1 and SSSE3 on MSVC. - */ -# define PNG_INTEL_SSE_IMPLEMENTATION 3 -# elif defined(__SSSE3__) -# define PNG_INTEL_SSE_IMPLEMENTATION 2 -# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -# define PNG_INTEL_SSE_IMPLEMENTATION 1 -# else -# define PNG_INTEL_SSE_IMPLEMENTATION 0 -# endif -# endif - -# if PNG_INTEL_SSE_IMPLEMENTATION > 0 -# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 -# endif -#else -# define PNG_INTEL_SSE_IMPLEMENTATION 0 -#endif - -#if PNG_MIPS_MSA_OPT > 0 -# ifndef PNG_MIPS_MSA_IMPLEMENTATION -# if defined(__mips_msa) -# if defined(__clang__) -# elif defined(__GNUC__) -# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 7) -# define PNG_MIPS_MSA_IMPLEMENTATION 2 -# endif /* no GNUC support */ -# endif /* __GNUC__ */ -# else /* !defined __mips_msa */ -# define PNG_MIPS_MSA_IMPLEMENTATION 2 -# endif /* __mips_msa */ -# endif /* !PNG_MIPS_MSA_IMPLEMENTATION */ - -# ifndef PNG_MIPS_MSA_IMPLEMENTATION -# define PNG_MIPS_MSA_IMPLEMENTATION 1 -# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips -# endif -#else -# define PNG_MIPS_MSA_IMPLEMENTATION 0 -#endif /* PNG_MIPS_MSA_OPT > 0 */ - -#if PNG_MIPS_MMI_OPT > 0 -# ifndef PNG_MIPS_MMI_IMPLEMENTATION -# if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) -# define PNG_MIPS_MMI_IMPLEMENTATION 2 -# else /* !defined __mips_loongson_mmi || _MIPS_SIM != _ABI64 */ -# define PNG_MIPS_MMI_IMPLEMENTATION 0 -# endif /* __mips_loongson_mmi && _MIPS_SIM == _ABI64 */ -# endif /* !PNG_MIPS_MMI_IMPLEMENTATION */ - -# if PNG_MIPS_MMI_IMPLEMENTATION > 0 -# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips -# endif -#else -# define PNG_MIPS_MMI_IMPLEMENTATION 0 -#endif /* PNG_MIPS_MMI_OPT > 0 */ - -#if PNG_POWERPC_VSX_OPT > 0 -# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_vsx -# define PNG_POWERPC_VSX_IMPLEMENTATION 1 -#else -# define PNG_POWERPC_VSX_IMPLEMENTATION 0 -#endif - -#if PNG_LOONGARCH_LSX_OPT > 0 -# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_lsx -# define PNG_LOONGARCH_LSX_IMPLEMENTATION 1 -#else -# define PNG_LOONGARCH_LSX_IMPLEMENTATION 0 -#endif - /* Is this a build of a DLL where compilation of the object modules requires * different preprocessor settings to those required for a simple library? If * so PNG_BUILD_DLL must be set. @@ -1282,105 +1093,37 @@ PNG_INTERNAL_FUNCTION(void,png_do_write_interlace,(png_row_infop row_info, PNG_INTERNAL_FUNCTION(void,png_read_filter_row,(png_structrp pp, png_row_infop row_info, png_bytep row, png_const_bytep prev_row, int filter),PNG_EMPTY); -#if PNG_ARM_NEON_OPT > 0 -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_neon,(png_row_infop row_info, - png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_neon,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_neon,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_neon,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_neon,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -#endif - -#if PNG_MIPS_MSA_IMPLEMENTATION == 1 -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_msa,(png_row_infop row_info, - png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_msa,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_msa,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_msa,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_msa,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_msa,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -#endif - -#if PNG_MIPS_MMI_IMPLEMENTATION > 0 -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_mmi,(png_row_infop row_info, - png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_mmi,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_mmi,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_mmi,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_mmi,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_mmi,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_mmi,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -#endif - -#if PNG_POWERPC_VSX_OPT > 0 -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vsx,(png_row_infop row_info, - png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -#endif - -#if PNG_INTEL_SSE_IMPLEMENTATION > 0 -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -#endif - -#if PNG_LOONGARCH_LSX_IMPLEMENTATION == 1 -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_lsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_lsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_lsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_lsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_lsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_lsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_lsx,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -#endif +#ifdef PNG_TARGET_CODE_IMPLEMENTATION +/* png_struct::target_state contains a cache of these flags and updates + * it as required during read. The hardware implementation may also do + * this, for example if it determines that hardware optimization is not + * available for this image. + */ +#define png_target_filters 1 /* MASK: hardware support for filters */ +#define png_target_palette 2 /* MASK: hardware support for palettes */ + +PNG_INTERNAL_FUNCTION(void,png_target_init,(png_structrp),PNG_EMPTY); + /* Initialize png_struct::target_state if required. */ + +PNG_INTERNAL_FUNCTION(void,png_target_free_data,(png_structrp),PNG_EMPTY); + /* Free any data allocated in the png_struct::target_data. + */ + +PNG_INTERNAL_FUNCTION(void, png_target_init_filter_functions, + (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); + /* The filter function initializer that selects the specific hardware + * implementation. Called once before the first row needs to be defiltered. + */ + +PNG_INTERNAL_FUNCTION(void, png_target_init_palette_support, (png_structrp), + PNG_EMPTY); +PNG_INTERNAL_FUNCTION(int, png_target_do_expand_palette, (png_structrp, + png_row_infop, png_const_bytep, const png_bytepp, const png_bytepp), + PNG_EMPTY); + /* Two functions to set up and execute palette expansion. The 'init' + * must succeed but then the 'do_expand' might, apparently, still fail. + */ +#endif /* HARDWARE */ /* Choose the best filter to use and filter the row data */ PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr, @@ -2102,72 +1845,9 @@ PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY); #endif /* SIMPLIFIED READ/WRITE */ -/* These are initialization functions for hardware specific PNG filter - * optimizations; list these here then select the appropriate one at compile - * time using the macro PNG_FILTER_OPTIMIZATIONS. If the macro is not defined - * the generic code is used. - */ -#ifdef PNG_FILTER_OPTIMIZATIONS -PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, - unsigned int bpp), PNG_EMPTY); - /* Just declare the optimization that will be used */ -#else - /* List *all* the possible optimizations here - this branch is required if - * the builder of libpng passes the definition of PNG_FILTER_OPTIMIZATIONS in - * CFLAGS in place of CPPFLAGS *and* uses symbol prefixing. - */ -# if PNG_ARM_NEON_OPT > 0 -PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon, - (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); -#endif - -#if PNG_MIPS_MSA_IMPLEMENTATION == 1 -PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips, - (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); -#endif - -# if PNG_MIPS_MMI_IMPLEMENTATION > 0 -PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips, - (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); -# endif - -# if PNG_INTEL_SSE_IMPLEMENTATION > 0 -PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2, - (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); -# endif -#endif - -#if PNG_LOONGARCH_LSX_OPT > 0 -PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_lsx, - (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); -#endif - PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr, png_const_charp key, png_bytep new_key), PNG_EMPTY); -#if PNG_ARM_NEON_IMPLEMENTATION == 1 -PNG_INTERNAL_FUNCTION(void, - png_riffle_palette_neon, - (png_structrp), - PNG_EMPTY); -PNG_INTERNAL_FUNCTION(int, - png_do_expand_palette_rgba8_neon, - (png_structrp, - png_row_infop, - png_const_bytep, - const png_bytepp, - const png_bytepp), - PNG_EMPTY); -PNG_INTERNAL_FUNCTION(int, - png_do_expand_palette_rgb8_neon, - (png_structrp, - png_row_infop, - png_const_bytep, - const png_bytepp, - const png_bytepp), - PNG_EMPTY); -#endif - /* Maintainer: Put new private prototypes here ^ */ #include "pngdebug.h" diff --git a/pngread.c b/pngread.c index 07a39df6e2..41016ff838 100644 --- a/pngread.c +++ b/pngread.c @@ -68,6 +68,15 @@ png_create_read_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr, # endif # endif +# ifdef PNG_TARGET_CODE_IMPLEMENTATION /* target specific code */ + /* Current support is read-only so this happens here, not in the + * general creation. It could easily be moved. + */ + png_target_init(png_ptr); + if (png_ptr->target_state != 0U) + png_set_option(png_ptr, PNG_TARGET_SPECIFIC_CODE, 1); +# endif + /* TODO: delay this, it can be done in png_init_io (if the app doesn't * do it itself) avoiding setting the default function if it is not * required. @@ -998,10 +1007,10 @@ png_read_destroy(png_structrp png_ptr) png_ptr->chunk_list = NULL; #endif -#if defined(PNG_READ_EXPAND_SUPPORTED) && \ - defined(PNG_ARM_NEON_IMPLEMENTATION) - png_free(png_ptr, png_ptr->riffled_palette); - png_ptr->riffled_palette = NULL; +#ifdef PNG_TARGET_STORES_DATA + if (png_ptr->target_data != NULL) + png_target_free_data(png_ptr); + png_ptr->target_data = NULL; #endif /* NOTE: the 'setjmp' buffer may still be allocated and the memory and error diff --git a/pngrtran.c b/pngrtran.c index 1526123e02..4af9fe04ef 100644 --- a/pngrtran.c +++ b/pngrtran.c @@ -18,17 +18,6 @@ #include "pngpriv.h" -#ifdef PNG_ARM_NEON_IMPLEMENTATION -# if PNG_ARM_NEON_IMPLEMENTATION == 1 -# define PNG_ARM_NEON_INTRINSICS_AVAILABLE -# if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64) -# include -# else -# include -# endif -# endif -#endif - #ifdef PNG_READ_SUPPORTED /* Set the action on getting a CRC error for an ancillary or critical chunk. */ @@ -4309,8 +4298,8 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, dp = row + ((size_t)row_width << 2) - 1; i = 0; -#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE - if (png_ptr->riffled_palette != NULL) +#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE + if ((png_ptr->target_state & png_target_palette) != 0) { /* The RGBA optimization works with png_ptr->bit_depth == 8 * but sometimes row_info->bit_depth has been changed to 8. @@ -4346,7 +4335,7 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, sp = row + (size_t)row_width - 1; dp = row + (size_t)(row_width * 3) - 1; i = 0; -#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE +#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row, &sp, &dp); #else @@ -4767,7 +4756,7 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info) { if (row_info->color_type == PNG_COLOR_TYPE_PALETTE) { -#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE +#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) { if (png_ptr->riffled_palette == NULL) diff --git a/pngrutil.c b/pngrutil.c index d31dc21dae..e99a945279 100644 --- a/pngrutil.c +++ b/pngrutil.c @@ -376,7 +376,6 @@ png_inflate_claim(png_structrp png_ptr, png_uint_32 owner) #if ZLIB_VERNUM >= 0x1240 int window_bits = 0; -# if defined(PNG_SET_OPTION_SUPPORTED) && defined(PNG_MAXIMUM_INFLATE_WINDOW) if (((png_ptr->options >> PNG_MAXIMUM_INFLATE_WINDOW) & 3) == PNG_OPTION_ON) { @@ -388,8 +387,6 @@ png_inflate_claim(png_structrp png_ptr, png_uint_32 owner) { png_ptr->zstream_start = 1; } -# endif - #endif /* ZLIB_VERNUM >= 0x1240 */ /* Set this for safety, just in case the previous owner left pointers to @@ -4114,27 +4111,15 @@ png_init_filter_functions(png_structrp pp) pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth_multibyte_pixel; -#ifdef PNG_FILTER_OPTIMIZATIONS - /* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to - * call to install hardware optimizations for the above functions; simply - * replace whatever elements of the pp->read_filter[] array with a hardware - * specific (or, for that matter, generic) optimization. - * - * To see an example of this examine what configure.ac does when - * --enable-arm-neon is specified on the command line. - */ - PNG_FILTER_OPTIMIZATIONS(pp, bpp); -#endif +# ifdef PNG_TARGET_IMPLEMENTS_FILTERS + png_target_init_filter_functions(pp, bpp); +# endif } void /* PRIVATE */ png_read_filter_row(png_structrp pp, png_row_infop row_info, png_bytep row, png_const_bytep prev_row, int filter) { - /* OPTIMIZATION: DO NOT MODIFY THIS FUNCTION, instead #define - * PNG_FILTER_OPTIMIZATIONS to a function that overrides the generic - * implementations. See png_init_filter_functions above. - */ if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST) { if (pp->read_filter[0] == NULL) @@ -4611,7 +4596,7 @@ defined(PNG_USER_TRANSFORM_PTR_SUPPORTED) png_ptr->big_prev_row = (png_bytep)png_malloc(png_ptr, row_bytes + 48); -#ifdef PNG_ALIGNED_MEMORY_SUPPORTED +#if PNG_TARGET_ROW_ALIGNMENT > 1 /* Use 16-byte aligned memory for row_buf with at least 16 bytes * of padding before and after row_buf; treat prev_row similarly. * NOTE: the alignment is to the start of the pixels, one beyond the start diff --git a/pngsimd.c b/pngsimd.c new file mode 100644 index 0000000000..7c1cb2f166 --- /dev/null +++ b/pngsimd.c @@ -0,0 +1,186 @@ +/* pngsimd.c - hardware (cpu/arch) specific code + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + * NOTE: this code is copied from libpng1.6 pngpriv.h. + */ +#include "pngpriv.h" + +#ifdef PNG_TARGET_CODE_IMPLEMENTATION +/* This is set by pngtarget.h iff there is some target code to be compiled. + */ + +/* Each piece of separate hardware support code must have a "init" file defined + * in PNG_TARGET_CODE_IMPLEMENTATION and included here. + * + * The "check" header set PNG_TARGET_CODE_IMPLEMENTATION and that file *MUST* + * supply macro definitions as follows. Note that all functions must be static + * to avoid clashes with other implementations. + * + * png_target_impl + * string constant + * REQUIRED + * This must be a string naming the implemenation. + * + * png_target_free_data_impl + * static void png_target_free_data_impl(png_structrp) + * REQUIRED if PNG_TARGET_STORES_DATA is defined + * UNDEFINED if PNG_TARGET_STORES_DATA is not defined + * A function to free data stored in png_struct::target_data. + * + * png_target_init_filter_functions_impl + * OPTIONAL + * Contains code to overwrite the png_struct::read_filter array, see + * the definition of png_init_filter_functions. Need not be defined, + * only called if target_state contains png_target_filters. + * + * png_target_init_palette_support_impl + * static function + * OPTIONAL + * Contains code to initialize a palette transformation. This returns + * true if something has been set up. Only called if the state contains + * png_target_palette, need not be defined, may cancel the state flag + * in the png_struct to prevent further calls. + * + * png_target_do_expand_palette_impl + * static function + * OPTIONAL + * Handles palette expansion. Need not be defined, only called if the + * state contains png_target_palette, may set this flag to zero, may + * return false to indicate that the expansion was not done. + * + * Either png_target_init_filter_functions_impl or + * png_target_do_expand_palette_impl must be defined. + */ + +/* This will fail in an obvious way with a meaningful error message if the file + * does not exist: + */ +#include PNG_TARGET_CODE_IMPLEMENTATION + +#ifndef png_target_impl +# error HARDWARE: PNG_TARGET_CODE_IMPLEMENTATION defined but not png_hareware_impl +#endif + +#if defined(PNG_TARGET_STORES_DATA) != defined(png_target_free_data_impl) +# error HARDWARE: PNG_TARGET_STORES_DATA !match png_target_free_data_impl +#endif + +#if !defined(png_target_init_filter_functions_impl) &&\ + !defined(png_target_init_palette_support) +# error HARDWARE: target specifc code turned on but none provided +#endif + +void +png_target_init(png_structrp pp) +{ + /* Initialize png_struct::target_state if required. */ +# ifdef png_target_init_filter_functions_impl +# define F png_target_filters +# else +# define F 0U +# endif +# ifdef png_target_do_expand_palette_impl +# define P png_target_palette +# else +# define P 0U +# endif + +# if F|P + pp->target_state = F|P; +# else + PNG_UNUSED(pp); +# endif +} + +#ifdef PNG_TARGET_STORES_DATA +#ifndef png_target_free_data_impl +# error PNG_TARGET_STORES_DATA defined without implementation +#endif +void +png_target_free_data(png_structrp pp) +{ + /* Free any data allocated in the png_struct::target_data. + */ + if (pp->target_data != NULL) + { + png_target_free_data_impl(pp); + if (pp->target_data != NULL) + png_error(pp, png_target_impl ": allocated data not released"); + } +} +#endif + +#ifdef PNG_TARGET_IMPLEMENTS_FILTERS +#ifndef png_target_init_filter_functions_impl +# error PNG_TARGET_IMPLEMENTS_FILTERS defined without implementation +#endif +void +png_target_init_filter_functions(png_structp pp, unsigned int bpp) +{ + if (((pp->options >> PNG_TARGET_SPECIFIC_CODE) & 3) == PNG_OPTION_ON && + (pp->target_state & png_target_filters) != 0) + png_target_init_filter_functions_impl(pp, bpp); +} +#endif /* filters */ + +#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE +#ifndef png_target_init_palette_support_impl +# error PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE defined without implementation +#endif +void +png_target_init_palette_support(png_structrp pp) +{ + if (((pp->options >> PNG_TARGET_SPECIFIC_CODE) & 3) == PNG_OPTION_ON && + (pp->target_state & png_target_palette) != 0 && + !png_target_init_palette_support_impl(pp, bpp)) + png_ptr->target_state &= ~png_target_palette; +} + +#ifndef png_target_do_expand_palette_impl +# error PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE defined without implementation +#endif +int +png_target_do_expand_palette(png_structrp pp, png_row_infop rip, + png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp) +{ + if (((pp->options >> PNG_TARGET_SPECIFIC_CODE) & 3) == PNG_OPTION_ON && + (pp->target_state & png_target_palette) != 0) + return png_target_do_expand_palette_impl(pp, rip, row, ssp, ddp); +} +#endif /* palette */ + +/* + * png_target_init_impl + * Set the mask of png_target_support values to + * png_struct::target_state. If the value is non-0 hardware support + * will be recorded as enabled. + * + * png_target_free_data_impl + * Must be defined if the implementation stores data in + * png_struct::target_data. Need not be defined otherwise. + * + * png_target_init_filter_functions_impl + * Contains code to overwrite the png_struct::read_filter array, see + * the definition of png_init_filter_functions. Need not be defined, + * only called if the state contains png_target_filters. + * + * png_target_init_palette_support_impl + * Contains code to initialize a palette transformation. This returns + * true if something has been set up. Only called if the state contains + * png_target_palette, need not be defined, may cancel the state flag + * in the png_struct to prevent further calls. + * + * png_target_do_expand_palette + * Handles palette expansion. Need not be defined, only called if the + * state contains png_target_palette, may set this flag to zero, may + * return false to indicate that the expansion was not done. + */ +#endif /* PNG_TARGET_ARCH */ diff --git a/pngstruct.h b/pngstruct.h index e591d94d58..f06d559c8a 100644 --- a/pngstruct.h +++ b/pngstruct.h @@ -342,9 +342,7 @@ struct png_struct_def #endif /* Options */ -#ifdef PNG_SET_OPTION_SUPPORTED png_uint_32 options; /* On/off state (up to 16 options) */ -#endif #if PNG_LIBPNG_VER < 10700 /* To do: remove this from libpng-1.7 */ @@ -382,12 +380,6 @@ struct png_struct_def /* deleted in 1.5.5: rgb_to_gray_blue_coeff; */ #endif -/* New member added in libpng-1.6.36 */ -#if defined(PNG_READ_EXPAND_SUPPORTED) && \ - defined(PNG_ARM_NEON_IMPLEMENTATION) - png_bytep riffled_palette; /* buffer for accelerated palette expansion */ -#endif - /* New member added in libpng-1.0.4 (renamed in 1.0.9) */ #if defined(PNG_MNG_FEATURES_SUPPORTED) /* Changed from png_byte to png_uint_32 at version 1.2.0 */ @@ -475,5 +467,19 @@ struct png_struct_def png_colorspace colorspace; #endif #endif + +/* New member added in libpng-1.6.36 */ +/* NOTE: prior to libpng-1.8 this also checked that PNG_ARM_NEON_IMPLEMENTATION + * is defined, however it was always defined... The code also checked that + * READ_EXPAND is supported but that will lead to bugs when some hardware + * implementation uses it for some other palette related thing. + * [[libpng-1.8]] changed to target_data for storing arbitrary data. + */ +#ifdef PNG_TARGET_CODE_IMPLEMENTATION /* file providing target specific code */ +# ifdef PNG_TARGET_STORES_DATA + png_voidp target_data; +# endif + png_uint_32 target_state; /* managed by libpng */ +#endif }; #endif /* PNGSTRUCT_H */ diff --git a/pngtarget.h b/pngtarget.h new file mode 100644 index 0000000000..502e1b38f8 --- /dev/null +++ b/pngtarget.h @@ -0,0 +1,129 @@ +/* pngtarget.h - target configuration file for libpng + * + * libpng version 1.6.44.git + * + * Copyright (c) 2024 John Bowler + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + * [[Added to libpng1.8]] + * + * This header file discovers whether the target machine has support for target + * (normally CPU) specific code such as SIMD instructions. It is included by + * pngpriv.h immediately after pnglibconf.h to establish compile-time (as + * opposed to configuration time) requirements for the build of libpng + * + * The header only defines a very limited number of macros and it only defines + * macros; no functions are declared, no types etc. + * + * Every target architecture must have the following file: + * + * /check.h + * + * This file contains checks based on compiler flags to determine if + * target-specific code can be implemented for this architecture with this set + * of compiler options. Define + * + * PNG_TARGET_CODE_IMPLEMENTATION + * + * To the quoted relative path name of a single C file to include to obtain the + * implementation of the the target specific code. For example: + * + * "arm/arm_init.c" + * "intel/intel_init.c" + * + * This file will be included by pngsmid.c so the string must be a valid + * relative path name from that file. See the file pngsmid.c for the definition + * of what the C file must do. + * + * When it defines PNG_TARGET_CODE_IMPLEMENTATION the check file may also + * define: + * + * PNG_TARGET_STORES_DATA + * If set a png_voidp pointer called "target_data" will be defined in + * pngstruct.h. The initialization code included in pngsimd.c must then + * also implement a function to free the data called png_target_free_data, + * see png_simd.c. + * + * PNG_TARGET_ROW_ALIGNMENT + * If set this defines a power-of-2 required memory alignment for rows + * passed to the read "filter". If not set this defaults to 1. + * + * PNG_TARGET_IMPLEMENTS_FILTERS + * If defined this indicates to the system that target specific + * implementations of the read filters may be available. This must be set + * to cause a target specific filter implementation to be used. + * + * PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE + * If defined this indicates to the system that target specific + * code for rgb_do_expand_palette is available. This must be defined to + * cause such implementations to be used. + * + * It MUST NOT define these macros unless it also defines + * PNG_TARGET_CODE_IMPLEMENTATION. At least one of the 'IMPLEMENTS' macros must + * be defined; this file will produce an error diagnostic if not. + * + * If the check.h file needs to define other macros, for example for use in the + * PNG_TARGET_CODE_IMPLEMENTATION file macros must have the form: + * + * PNG_TARGET__... + * + * Where ARCH the architecture directory (the directory containing check.h) in + * upper case. See pngsimd.c for more information about function definitions + * used to implement the code. + */ +#ifndef PNGTARGET_H +#define PNGTARGET_H + +#ifdef PNG_TARGET_SPECIFIC_CODE_SUPPORTED /* from pnglibconf.h */ +# ifdef PNG_READ_SUPPORTED /* checked here as a convenience */ +# include "mips/check.h" +# include "powerpc/check.h" +# include "intel/check.h" +# include "arm/check.h" +#endif +#endif /* PNG_TARGET_SPECIFIC_CODE_SUPPORTED */ + +/* This is also a convenience to avoid checking in every check.h: */ +#ifndef PNG_READ_EXPAND_SUPPORTED +# undef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE +#endif + +/* Now check the condition above. Note that these checks consider the composite + * result of all the above includes; if errors are preceded by warnings about + * redefinition of the macros those need to be fixed first. + */ +#ifdef PNG_TARGET_CODE_IMPLEMENTATION /* There is target-specific code */ +/* List all the supported target specific code types here: */ +# if !defined(PNG_TARGET_IMPLEMENTS_FILTERS) &&\ + !defined(PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE) +# error PNG_TARGET_CODE_IMPLEMENTATION without any implementations. + +/* Currently only row alignments which are a power of 2 and less than 17 are + * supported: the current code always aligns to 16 bytes (but may not in the + * future). + */ +# if defined(PNG_TARGET_ROW_ALIGNMENT) && (\ + PNG_TARGET_ROW_ALIGNMENT > 16 /*too big*/ ||\ + PNG_TARGET_ROW_ALIGNMENT !=\ + (PNG_TARGET_ROW_ALIGNMENT & -PNG_TARGET_ROW_ALIGNMENT)) /*!power of 2*/ +# error unsupported TARGET_ROW_ALIGNMENT +# endif /* PNG_TARGET_ROW_ALIGNMENT check */ +#endif /* Target specific code macro checks. */ +#endif /* PNG_TARGET_SPECIFIC_CODE_SUPPORTED */ + +#ifndef PNG_TARGET_CODE_IMPLEMENTATION +# if defined(PNG_TARGET_STORES_DATA) ||\ + defined(PNG_TARGET_ROW_ALIGNMENT) ||\ + defined(PNG_TARGET_IMPLEMENTS_FILTERS) ||\ + defined(PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE) +# error PNG_TARGET_ macro defined without target specfic code. +# endif /* Check PNG_TARGET_ macros are not defined. */ +#endif /* PNG_TARGET_CODE_IMPLEMENTATION */ + +#ifndef PNG_TARGET_ROW_ALIGNMENT +# define PNG_TARGET_ROW_ALIGNMENT 1 +#endif +#endif /* PNGTARGET_H */ diff --git a/pngtest.c b/pngtest.c index ba2b7463eb..3400520bd3 100644 --- a/pngtest.c +++ b/pngtest.c @@ -36,6 +36,7 @@ #include #include #include +#define STDERR stdout #ifdef PNG_ZLIB_HEADER # include PNG_ZLIB_HEADER /* defined by pnglibconf.h from 1.7 */ diff --git a/powerpc/check.h b/powerpc/check.h new file mode 100644 index 0000000000..dc09d494b8 --- /dev/null +++ b/powerpc/check.h @@ -0,0 +1,22 @@ +/* powerpc/check.h - POWERPC optimised filter functions + * + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 2017 Glenn Randers-Pehrson + * Written by Vadim Barkov, 2017. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ +#if defined(__PPC64__) && defined(__ALTIVEC__) && defined(PNG_READ_SUPPORTED) + +#include + +#ifdef __VSX__ +# define PNG_TARGET_CODE_IMPLEMENTATION "powerpc/powerpc_init.c" + /* PNG_TARGET_STORES_DATA */ +# define PNG_TARGET_IMPLEMENTS_FILTERS + /* PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE */ + /* PNG_TARGET_ROW_ALIGNMENT */ +#endif /* __VSX__ */ +#endif /* __PPC64__ && __ALTIVEC__ && READ */ diff --git a/powerpc/filter_vsx_intrinsics.c b/powerpc/filter_vsx_intrinsics.c index 01cf8800dc..ee4edb08b0 100644 --- a/powerpc/filter_vsx_intrinsics.c +++ b/powerpc/filter_vsx_intrinsics.c @@ -9,27 +9,9 @@ * and license in png.h */ -#include -#include -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -/* This code requires -maltivec and -mvsx on the command line: */ -#if PNG_POWERPC_VSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ - -#include - -#if PNG_POWERPC_VSX_OPT > 0 - -#ifndef __VSX__ -# error "This code requires VSX support (POWER7 and later). Please provide -mvsx compiler flag." -#endif - #define vec_ld_unaligned(vec,data) vec = vec_vsx_ld(0,data) #define vec_st_unaligned(vec,data) vec_vsx_st(vec,0,data) - /* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d). * They're positioned like this: * prev: c b @@ -55,8 +37,9 @@ istop = 0;\ } -void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { vector unsigned char rp_vec; vector unsigned char pp_vec; @@ -97,8 +80,7 @@ void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row, *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff); rp++; } -} - + } } static const vector unsigned char VSX_LEFTSHIFTED1_4 = {16,16,16,16, 0, 1, 2, 3,16,16,16,16,16,16,16,16}; @@ -171,8 +153,9 @@ static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,1 # define vsx_abs(number) (number > 0) ? (number) : -(number) #endif -void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { png_byte bpp = 4; @@ -228,8 +211,9 @@ void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row, } -void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { png_byte bpp = 3; @@ -292,8 +276,9 @@ void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { png_byte bpp = 4; @@ -379,8 +364,9 @@ void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) +static void +png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) { png_byte bpp = 3; @@ -497,7 +483,8 @@ void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row, *rp++ = (png_byte)a;\ } -void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { png_byte bpp = 4; @@ -617,7 +604,8 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, } } -void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row, +static void +png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { png_byte bpp = 3; @@ -762,7 +750,3 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row, vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) } } - -#endif /* PNG_POWERPC_VSX_OPT > 0 */ -#endif /* PNG_POWERPC_VSX_IMPLEMENTATION == 1 (intrinsics) */ -#endif /* READ */ diff --git a/powerpc/powerpc_init.c b/powerpc/powerpc_init.c index 54426c558e..2b4c7b6b5a 100644 --- a/powerpc/powerpc_init.c +++ b/powerpc/powerpc_init.c @@ -1,4 +1,3 @@ - /* powerpc_init.c - POWERPC optimised filter functions * * Copyright (c) 2018 Cosmin Truta @@ -9,102 +8,19 @@ * For conditions of distribution and use, see the disclaimer * and license in png.h */ +#define png_target_impl "powerpc-vsx" -/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are - * called. - */ -#define _POSIX_SOURCE 1 - -#include -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -#if PNG_POWERPC_VSX_OPT > 0 -#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED /* Do run-time checks */ -/* WARNING: it is strongly recommended that you do not build libpng with - * run-time checks for CPU features if at all possible. In the case of the PowerPC - * VSX instructions there is no processor-specific way of detecting the - * presence of the required support, therefore run-time detection is extremely - * OS specific. - * - * You may set the macro PNG_POWERPC_VSX_FILE to the file name of file containing - * a fragment of C source code which defines the png_have_vsx function. There - * are a number of implementations in contrib/powerpc-vsx, but the only one that - * has partial support is contrib/powerpc-vsx/linux.c - a generic Linux - * implementation which reads /proc/cpufino. - */ -#ifndef PNG_POWERPC_VSX_FILE -# ifdef __linux__ -# define PNG_POWERPC_VSX_FILE "contrib/powerpc-vsx/linux_aux.c" -# endif -#endif - -#ifdef PNG_POWERPC_VSX_FILE - -#include /* for sig_atomic_t */ -static int png_have_vsx(png_structp png_ptr); -#include PNG_POWERPC_VSX_FILE - -#else /* PNG_POWERPC_VSX_FILE */ -# error "PNG_POWERPC_VSX_FILE undefined: no support for run-time POWERPC VSX checks" -#endif /* PNG_POWERPC_VSX_FILE */ -#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */ +#include +#include "filter_vsx_intrinsics.c" void png_init_filter_functions_vsx(png_structp pp, unsigned int bpp) { - /* The switch statement is compiled in for POWERPC_VSX_API, the call to - * png_have_vsx is compiled in for POWERPC_VSX_CHECK. If both are defined - * the check is only performed if the API has not set the PowerPC option on - * or off explicitly. In this case the check controls what happens. - */ - -#ifdef PNG_POWERPC_VSX_API_SUPPORTED - switch ((pp->options >> PNG_POWERPC_VSX) & 3) - { - case PNG_OPTION_UNSET: - /* Allow the run-time check to execute if it has been enabled - - * thus both API and CHECK can be turned on. If it isn't supported - * this case will fall through to the 'default' below, which just - * returns. - */ -#endif /* PNG_POWERPC_VSX_API_SUPPORTED */ -#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED - { - static volatile sig_atomic_t no_vsx = -1; /* not checked */ - - if (no_vsx < 0) - no_vsx = !png_have_vsx(pp); - - if (no_vsx) - return; - } -#ifdef PNG_POWERPC_VSX_API_SUPPORTED - break; -#endif -#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */ - -#ifdef PNG_POWERPC_VSX_API_SUPPORTED - default: /* OFF or INVALID */ - return; - - case PNG_OPTION_ON: - /* Option turned on */ - break; - } -#endif - - /* IMPORTANT: any new internal functions used here must be declared using - * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the - * 'prefix' option to configure works: + /* IMPORTANT: DO NOT DEFINE EXTERNAL FUNCTIONS HERE * - * ./configure --with-libpng-prefix=foobar_ - * - * Verify you have got this right by running the above command, doing a build - * and examining pngprefix.h; it must contain a #define for every external - * function you add. (Notice that this happens automatically for the - * initialization function.) + * This is because external functions must be declared with + * PNG_INTERNAL_FUNCTION in pngpriv.h; without this the PNG_PREFIX option to + * the build will not work (it will not know about these symbols). */ pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vsx; @@ -122,5 +38,5 @@ png_init_filter_functions_vsx(png_structp pp, unsigned int bpp) pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vsx; } } -#endif /* PNG_POWERPC_VSX_OPT > 0 */ -#endif /* READ */ + +#define png_target_init_filter_functions_impl png_init_filter_functions_vsx diff --git a/scripts/pnglibconf.dfa b/scripts/pnglibconf.dfa index fe8e481238..59ea5568b4 100644 --- a/scripts/pnglibconf.dfa +++ b/scripts/pnglibconf.dfa @@ -197,142 +197,6 @@ setting API_RULE default 0 setting PREFIX -# Implementation specific control of the optimizations, enabled by those -# hardware or software options that need it (typically when run-time choices -# must be made by the user) -option SET_OPTION disabled - -# These options are specific to the ARM NEON hardware optimizations. At present -# these optimizations depend on GCC specific pre-processing of an assembler (.S) -# file, so they probably won't work with other compilers. -# -# ARM_NEON_OPT: -# unset: check at compile time -# (__ARM_NEON__ must be predefined by the compiler, as a result of -# passing "-mfpu=neon" to the compiler options) -# 0: disable (even if the CPU has a NEON FPU) -# 1: check at run time (via ARM_NEON_{API,CHECK}) -# 2: switch on unconditionally -# (inadvisable - instead, pass "-mfpu=neon" to the compiler) -# NOTE: -# When building libpng, avoid using any setting other than '0'; -# '1' is set automatically when either 'API' or 'CHECK' are configured in; -# '2' should not be necessary, as "-mfpu=neon" will achieve the same effect -# as well as applying the NEON optimizations to the rest of libpng. -# NOTE: -# Any setting other than '0' requires ALIGNED_MEMORY. -# -# ARM_NEON_API: -# (PNG_ARM_NEON == 1) -# Allow the optimization to be switched on with png_set_option. -# -# ARM_NEON_CHECK: -# (PNG_ARM_NEON == 1) -# Compile a run-time check to see if Neon extensions are supported. -# This is poorly supported and deprecated - use the png_set_option API. -# -setting ARM_NEON_OPT -option ARM_NEON_API disabled requires ALIGNED_MEMORY enables SET_OPTION, - sets ARM_NEON_OPT 1 -option ARM_NEON_CHECK disabled requires ALIGNED_MEMORY, - sets ARM_NEON_OPT 1 - -# These options are specific to the PowerPC VSX hardware optimizations. -# -# POWERPC_VSX_OPT: -# unset: check at compile time -# (__PPC64__,__ALTIVEC__,__VSX__ must be predefined by the compiler, -# as a result of passing "-mvsx -maltivec" to the compiler options) -# 0: disable (even if the CPU supports VSX) -# 1: check at run time (via POWERPC_VSX_{API,CHECK}) -# 2: switch on unconditionally -# (inadvisable - instead, pass "-mvsx -maltivec" to the compiler) -# NOTE: -# When building libpng, avoid using any setting other than '0'; -# '1' is set automatically when either 'API' or 'CHECK' are configured in; -# '2' should not be necessary, as "-mvsx -maltivec" will achieve the same -# effect as well as applying the VSX optimizations to the rest of libpng. -# -# POWERPC_VSX_API: -# (PNG_POWERPC_VSX == 1) -# Allow the optimization to be switched on with png_set_option. -# -# POWERPC_VSX_CHECK: -# (PNG_POWERPC_VSX == 1) -# Compile a run-time check to see if VSX extensions are supported. -# This is not supported on all systems. See contrib/powerpc-vsx/README. -# -setting POWERPC_VSX_OPT -option POWERPC_VSX_API disabled enables SET_OPTION, - sets POWERPC_VSX_OPT 1 -option POWERPC_VSX_CHECK disabled, - sets POWERPC_VSX_OPT 1 - -# These options are specific to the MIPS MSA hardware optimizations. -# -# MIPS_MSA_OPT: -# unset: check at compile time -# (__mips_msa must be predefined by the compiler, as a result of -# passing "-mmsa -mfp64" to the compiler options) -# 0: disable (even if the CPU supports MSA) -# 1: check at run time (via MIPS_MSA_{API,CHECK}) -# 2: switch on unconditionally -# (inadvisable - instead, pass "-mmsa -mfp64" to the compiler) -# NOTE: -# When building libpng, avoid using any setting other than '0'; -# '1' is set automatically when either 'API' or 'CHECK' are configured in; -# '2' should not be necessary, as "-mmsa -mfp64" will achieve the same -# effect as well as applying the MSA optimizations to the rest of libpng. -# NOTE: -# Any setting other than '0' requires ALIGNED_MEMORY. -# -# MIPS_MSA_API: -# (PNG_MIPS_MSA == 1) -# Allow the optimization to be switched on with png_set_option. -# -# MIPS_MSA_CHECK: -# (PNG_MIPS_MSA == 1) -# Compile a run-time check to see if MSA extensions are supported. -# -setting MIPS_MSA_OPT -option MIPS_MSA_API disabled requires ALIGNED_MEMORY enables SET_OPTION, - sets MIPS_MSA_OPT 1 -option MIPS_MSA_CHECK disabled requires ALIGNED_MEMORY, - sets MIPS_MSA_OPT 1 - -# These options are specific to the MIPS MMI hardware optimizations. -# -# MIPS_MMI_OPT: -# unset: check at compile time -# (__mips_loongson_mmi must be defined by the compiler, as a result of -# passing "-mloongson-mmi -march=loongson3a" to the compiler options) -# 0: disable (even if the CPU supports MMI) -# 1: check at run time (via MIPS_MMI_{API,CHECK}) -# 2: switch on unconditionally -# (inadvisable - instead, pass "-mloongson-mmi -march=loongson3a" to the -# compiler) -# NOTE: -# When building libpng, avoid using any setting other than '0'; -# '1' is set automatically when either 'API' or 'CHECK' are configured in; -# '2' should not be necessary, as "-mloongson-mmi -march=loongson3a" will -# achieve the same effect as well as applying the MMI optimizations to the -# rest of libpng. -# -# MIPS_MMI_API: -# (PNG_MIPS_MMI == 1) -# Allow the optimization to be switched on with png_set_option. -# -# MIPS_MMI_CHECK: -# (PNG_MIPS_MMI == 1) -# Compile a run-time check to see if MMI extensions are supported. -# -setting MIPS_MMI_OPT -option MIPS_MMI_API disabled requires ALIGNED_MEMORY enables SET_OPTION, - sets MIPS_MMI_OPT 1 -option MIPS_MMI_CHECK disabled requires ALIGNED_MEMORY, - sets MIPS_MMI_OPT 1 - - # These settings configure the default compression level (0-9) and 'strategy'; # strategy is as defined by the implementors of zlib. It describes the input # data and modifies the zlib parameters in an attempt to optimize the balance @@ -432,7 +296,7 @@ option BENIGN_READ_ERRORS requires BENIGN_ERRORS # Furthermore the option is explicitly turned off here if the zlib version # number is below that required - libpng wouldn't compile in that case if the # option were turned on. -option DISABLE_ADLER32_CHECK requires READ enables SET_OPTION disabled +option DISABLE_ADLER32_CHECK requires READ disabled # ZLIB_VERNUM must be used here, not PNG_ZLIB_VERNUM, because # scripts/options.awk ends up putting this test adhead of the setting of @@ -544,7 +408,7 @@ option SET_USER_LIMITS requires USER_LIMITS # to libpng 1.6; the new interfaces in 1.6 will take several years to become # popular. -option READ enables READ_INTERLACING SET_OPTION +option READ enables READ_INTERLACING # Disabling READ_16BIT does not disable reading 16-bit PNG files, but it # forces them to be chopped down to 8-bit, and disables any 16-bit @@ -756,13 +620,6 @@ option COLORSPACE enables GAMMA disabled setting sRGB_PROFILE_CHECKS default 2 -# Artificially align memory - the code typically aligns to 8 byte -# boundaries if this is switched on, it's a small waste of space -# but can help (in theory) on some architectures. Only affects -# internal structures. Added at libpng 1.4.0 - -option ALIGNED_MEMORY - # Buggy compilers (e.g., gcc 2.7.2.2) need PNG_NO_POINTER_INDEXING # See png[wr]util.c, normally this should always be *on* @@ -843,6 +700,16 @@ setting INFLATE_BUF_SIZE default 1024 setting IDAT_READ_SIZE default PNG_ZBUF_SIZE +# Target specific code. By default libpng will use "target specific" code if +# available. This means code that depends on particular capabilities of a CPU +# and its instruction set. This configuration option is provided to allow +# such code to be completely disabled. See pngsimd.c for more discussion about +# the advantages and disadvantages of target specific code. +# +# At present target specific code is available only for read operations, so: + +option TARGET_SPECIFIC_CODE requires READ + # Ancillary chunks chunk bKGD chunk cHRM enables COLORSPACE @@ -857,7 +724,7 @@ chunk pHYs chunk sBIT chunk sCAL chunk sPLT -chunk sRGB enables COLORSPACE, GAMMA, SET_OPTION +chunk sRGB enables COLORSPACE, GAMMA chunk tEXt requires TEXT chunk tIME chunk tRNS