summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorGabe Black <gabeblack@chromium.org>2011-10-18 17:30:00 -0700
committerGabe Black <gabeblack@chromium.org>2011-10-20 17:49:20 -0700
commitd69f2460c5db7d74b8431d4f1c373b6bd1b7e38a (patch)
tree6245162628f479fb72eddec588f57db42669cb52 /arch
parent697c0549797382605805db6f31d307b9174a0b96 (diff)
Import the glibc implementation of x86 memset into u-boot
I ran four iterations with the two implementations and used Vadim's CBMEM infrastructure to measure the time they took. These are all in microseconds, and the timestamp portion of the raw output of cbmem.py is included in the bug. The new implementation is about twice as fast as the old. Old: 1. 418,286 2. 418,302 3. 418,298 4. 418,290 New: 1. 184,800 2. 194,629 3. 194,188 4. 192,718 BUG=chrome-os-partner:6487 TEST=Booted on Stumpy. Change-Id: Iba398929cbba395e10851d676ae9d356ae670f41 Signed-off-by: Gabe Black <gabeblack@google.com> Reviewed-on: http://gerrit.chromium.org/gerrit/10284 Reviewed-by: Mike Frysinger <vapier@chromium.org> Reviewed-by: Simon Glass <sjg@chromium.org> Commit-Ready: Gabe Black <gabeblack@chromium.org> Tested-by: Gabe Black <gabeblack@chromium.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/string.h2
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/string.c87
3 files changed, 89 insertions, 1 deletions
diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index 3643a79fdf..3aa6c1131b 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -23,7 +23,7 @@ extern void * memmove(void *, const void *, __kernel_size_t);
#undef __HAVE_ARCH_MEMCHR
extern void * memchr(const void *, int, __kernel_size_t);
-#undef __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMSET
extern void * memset(void *, int, __kernel_size_t);
#undef __HAVE_ARCH_MEMZERO
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 8b0ac74dca..9ecf16069e 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -39,6 +39,7 @@ COBJS-$(CONFIG_SYS_GENERIC_TIMER) += pcat_timer.o
COBJS-$(CONFIG_PCI) += pci.o
COBJS-$(CONFIG_PCI) += pci_type1.o
COBJS-y += realmode.o
+COBJS-y += string.o
COBJS-y += timer.o
COBJS-$(CONFIG_VGA_CONSOLE) += video_bios.o
COBJS-$(CONFIG_VGA_CONSOLE) += video.o
diff --git a/arch/x86/lib/string.c b/arch/x86/lib/string.c
new file mode 100644
index 0000000000..13461732b3
--- /dev/null
+++ b/arch/x86/lib/string.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc.
+ * This file is part of the GNU C Library.
+ * Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+/* From glibc-2.14, sysdeps/i386/memset.c */
+
+#include <compiler.h>
+#include <asm/string.h>
+#include <linux/types.h>
+
+typedef uint32_t op_t;
+
+void *memset(void *dstpp, int c, size_t len)
+{
+ int d0;
+ unsigned long int dstp = (unsigned long int) dstpp;
+
+ /* This explicit register allocation improves code very much indeed. */
+ register op_t x asm("ax");
+
+ x = (unsigned char) c;
+
+ /* Clear the direction flag, so filling will move forward. */
+ asm volatile("cld");
+
+ /* This threshold value is optimal. */
+ if (len >= 12) {
+ /* Fill X with four copies of the char we want to fill with. */
+ x |= (x << 8);
+ x |= (x << 16);
+
+ /* Adjust LEN for the bytes handled in the first loop. */
+ len -= (-dstp) % sizeof(op_t);
+
+ /*
+ * There are at least some bytes to set. No need to test for
+ * LEN == 0 in this alignment loop.
+ */
+
+ /* Fill bytes until DSTP is aligned on a longword boundary. */
+ asm volatile(
+ "rep\n"
+ "stosb" /* %0, %2, %3 */ :
+ "=D" (dstp), "=c" (d0) :
+ "0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) :
+ "memory");
+
+ /* Fill longwords. */
+ asm volatile(
+ "rep\n"
+ "stosl" /* %0, %2, %3 */ :
+ "=D" (dstp), "=c" (d0) :
+ "0" (dstp), "1" (len / sizeof(op_t)), "a" (x) :
+ "memory");
+ len %= sizeof(op_t);
+ }
+
+ /* Write the last few bytes. */
+ asm volatile(
+ "rep\n"
+ "stosb" /* %0, %2, %3 */ :
+ "=D" (dstp), "=c" (d0) :
+ "0" (dstp), "1" (len), "a" (x) :
+ "memory");
+
+ return dstpp;
+}