summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Krummenacher <max.oss.09@gmail.com>2015-11-10 20:05:42 +0100
committerMax Krummenacher <max.oss.09@gmail.com>2015-11-10 20:05:42 +0100
commite474eb8a4b6e9651052fe7eb86bfd5826c052c2c (patch)
treefd65ef0d17abf7f8d625307b79b6377504ab21e5
parent606e603146985d68e805e992c7e5e66f0636ae47 (diff)
glibc_2.21: force memcpy_arm to improve memcpy performance
A meta-toolchain build or a build which does not use the angstrom distro builds glibc_2.21 rather than glibc_linaro-2.20. The patch to not use the slower memcpy implemented for NEON does not apply cleanly for both versions of glibc. This adds a bbappend for glibc_2.21 forcing memcpy_arm.
-rw-r--r--recipes-core/glibc/glibc-2.21/0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch127
-rw-r--r--recipes-core/glibc/glibc_2.21.bbappend2
2 files changed, 129 insertions, 0 deletions
diff --git a/recipes-core/glibc/glibc-2.21/0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch b/recipes-core/glibc/glibc-2.21/0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch
new file mode 100644
index 0000000..dd4964b
--- /dev/null
+++ b/recipes-core/glibc/glibc-2.21/0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch
@@ -0,0 +1,127 @@
+From a086ff98ad0bb996241037689188caf394f6c633 Mon Sep 17 00:00:00 2001
+From: Max Krummenacher <max.krummenacher@toradex.com>
+Date: Fri, 12 Jun 2015 13:27:55 +0200
+Subject: [PATCH] memcpy: don't use optimized for VFP/NEON versions
+
+Tests with the tinymembench tool on a Colibri T30 show the performance
+of the standard arm based memcpy to be around 2 times faster than
+__memcpy_neon or __memcpy_vfp.
+
+Tests on Apalis iMX6 confirm this, although running only around 1.3 times faster.
+---
+ sysdeps/arm/armv7/multiarch/Makefile | 3 ---
+ sysdeps/arm/armv7/multiarch/ifunc-impl-list.c | 18 ------------------
+ sysdeps/arm/armv7/multiarch/memcpy.S | 17 -----------------
+ sysdeps/arm/armv7/multiarch/memcpy_neon.S | 9 ---------
+ sysdeps/arm/armv7/multiarch/memcpy_vfp.S | 7 -------
+ 5 files changed, 54 deletions(-)
+
+diff --git a/sysdeps/arm/armv7/multiarch/Makefile b/sysdeps/arm/armv7/multiarch/Makefile
+index e834cc9..e69de29 100644
+--- a/sysdeps/arm/armv7/multiarch/Makefile
++++ b/sysdeps/arm/armv7/multiarch/Makefile
+@@ -1,3 +0,0 @@
+-ifeq ($(subdir),string)
+-sysdep_routines += memcpy_neon memcpy_vfp
+-endif
+diff --git a/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c b/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
+index 2515418..322eae6 100644
+--- a/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
++++ b/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
+@@ -31,25 +31,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ {
+ size_t i = 0;
+
+- bool use_neon = true;
+-#ifdef __ARM_NEON__
+-# define __memcpy_neon memcpy
+-#else
+- use_neon = (GLRO(dl_hwcap) & HWCAP_ARM_NEON) != 0;
+-#endif
+-
+-#ifndef __ARM_NEON__
+- bool use_vfp = true;
+-# ifdef __SOFTFP__
+- use_vfp = (GLRO(dl_hwcap) & HWCAP_ARM_VFP) != 0;
+-# endif
+-#endif
+-
+ IFUNC_IMPL (i, name, memcpy,
+- IFUNC_IMPL_ADD (array, i, memcpy, use_neon, __memcpy_neon)
+-#ifndef __ARM_NEON__
+- IFUNC_IMPL_ADD (array, i, memcpy, use_vfp, __memcpy_vfp)
+-#endif
+ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_arm));
+
+ return i;
+diff --git a/sysdeps/arm/armv7/multiarch/memcpy.S b/sysdeps/arm/armv7/multiarch/memcpy.S
+index c4f4e80..9ee4d73 100644
+--- a/sysdeps/arm/armv7/multiarch/memcpy.S
++++ b/sysdeps/arm/armv7/multiarch/memcpy.S
+@@ -23,37 +23,20 @@
+ #include <rtld-global-offsets.h>
+
+ #if IS_IN (libc)
+-/* Under __ARM_NEON__, memcpy_neon.S defines the name memcpy. */
+-# ifndef __ARM_NEON__
+ .text
+ ENTRY(memcpy)
+ .type memcpy, %gnu_indirect_function
+-# ifdef __SOFTFP__
+ ldr r1, .Lmemcpy_arm
+- tst r0, #HWCAP_ARM_VFP
+- ldrne r1, .Lmemcpy_vfp
+-# else
+- ldr r1, .Lmemcpy_vfp
+-# endif
+- tst r0, #HWCAP_ARM_NEON
+- ldrne r1, .Lmemcpy_neon
+ 1:
+ add r0, r1, pc
+ DO_RET(lr)
+
+-# ifdef __SOFTFP__
+ .Lmemcpy_arm:
+ .long C_SYMBOL_NAME(__memcpy_arm) - 1b - PC_OFS
+-# endif
+-.Lmemcpy_neon:
+- .long C_SYMBOL_NAME(__memcpy_neon) - 1b - PC_OFS
+-.Lmemcpy_vfp:
+- .long C_SYMBOL_NAME(__memcpy_vfp) - 1b - PC_OFS
+
+ END(memcpy)
+
+ libc_hidden_builtin_def (memcpy)
+-#endif /* Not __ARM_NEON__. */
+
+ /* These versions of memcpy are defined not to clobber any VFP or NEON
+ registers so they must always call the ARM variant of the memcpy code. */
+diff --git a/sysdeps/arm/armv7/multiarch/memcpy_neon.S b/sysdeps/arm/armv7/multiarch/memcpy_neon.S
+index e60d1cc..e69de29 100644
+--- a/sysdeps/arm/armv7/multiarch/memcpy_neon.S
++++ b/sysdeps/arm/armv7/multiarch/memcpy_neon.S
+@@ -1,9 +0,0 @@
+-#ifdef __ARM_NEON__
+-/* Under __ARM_NEON__, this file defines memcpy directly. */
+-libc_hidden_builtin_def (memcpy)
+-#else
+-# define memcpy __memcpy_neon
+-#endif
+-
+-#define MEMCPY_NEON
+-#include "memcpy_impl.S"
+diff --git a/sysdeps/arm/armv7/multiarch/memcpy_vfp.S b/sysdeps/arm/armv7/multiarch/memcpy_vfp.S
+index e008c04..e69de29 100644
+--- a/sysdeps/arm/armv7/multiarch/memcpy_vfp.S
++++ b/sysdeps/arm/armv7/multiarch/memcpy_vfp.S
+@@ -1,7 +0,0 @@
+-/* Under __ARM_NEON__, memcpy_neon.S defines memcpy directly
+- and the __memcpy_vfp code will never be used. */
+-#ifndef __ARM_NEON__
+-# define MEMCPY_VFP
+-# define memcpy __memcpy_vfp
+-# include "memcpy_impl.S"
+-#endif
+--
+1.9.3
+
diff --git a/recipes-core/glibc/glibc_2.21.bbappend b/recipes-core/glibc/glibc_2.21.bbappend
new file mode 100644
index 0000000..244841d
--- /dev/null
+++ b/recipes-core/glibc/glibc_2.21.bbappend
@@ -0,0 +1,2 @@
+FILESEXTRAPATHS_prepend := "${THISDIR}/glibc-2.21:"
+SRC_URI_append = " file://0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch"