·

first benchmarks. Don't really make sense
2024-09-15 19:24:26 +02:00 · 2024-09-03 12:59:26 +02:00
13 changed files with 1534 additions and 0 deletions
--- a/benchies.md
+++ b/benchies.md
@ -0,0 +1,17 @@
+app_kernel: runs in 0 ns
+cmsis_dsp/basicmath: runs in 0 ns
+data_structure_perf/dlist_perf: ""
+data_structure_perf/rbtree_perf: ""
+footprints: runs at least 7 minutes without output
+latency_measures: 0ns
+mbedtls: at least 3mins without output
+sched: 0ns an doesn't exit
+sched_userspace: doesn't compile
+sys_kernel: 0 ns and no exit
+
+linpack:
+LTO && CFI   -> ~ 4000000 KFLOPS
+LTO && !CFI  -> ~ 3650000 KFLOPS
+!LTO && !CFI -> ~ 3650000 KFLOPS
+
+mibench -> not implemented for zephyr and no reason to suspect result differ from linpack
--- a/benchies/embench/CMakeLists.txt
+++ b/benchies/embench/CMakeLists.txt
@ -0,0 +1,7 @@
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.20.0)
+find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
+project(blinky)
+
+target_sources(app PRIVATE src/main.c src/bench.c)
--- a/benchies/embench/README.rst
+++ b/benchies/embench/README.rst
@ -0,0 +1,97 @@
+.. zephyr:code-sample:: blinky
+   :name: Blinky
+   :relevant-api: gpio_interface
+
+   Blink an LED forever using the GPIO API.
+
+Overview
+********
+
+The Blinky sample blinks an LED forever using the :ref:`GPIO API <gpio_api>`.
+
+The source code shows how to:
+
+#. Get a pin specification from the :ref:`devicetree <dt-guide>` as a
+   :c:struct:`gpio_dt_spec`
+#. Configure the GPIO pin as an output
+#. Toggle the pin forever
+
+See :zephyr:code-sample:`pwm-blinky` for a similar sample that uses the PWM API instead.
+
+.. _blinky-sample-requirements:
+
+Requirements
+************
+
+Your board must:
+
+#. Have an LED connected via a GPIO pin (these are called "User LEDs" on many of
+   Zephyr's :ref:`boards`).
+#. Have the LED configured using the ``led0`` devicetree alias.
+
+Building and Running
+********************
+
+Build and flash Blinky as follows, changing ``reel_board`` for your board:
+
+.. zephyr-app-commands::
+   :zephyr-app: samples/basic/blinky
+   :board: reel_board
+   :goals: build flash
+   :compact:
+
+After flashing, the LED starts to blink and messages with the current LED state
+are printed on the console. If a runtime error occurs, the sample exits without
+printing to the console.
+
+Build errors
+************
+
+You will see a build error at the source code line defining the ``struct
+gpio_dt_spec led`` variable if you try to build Blinky for an unsupported
+board.
+
+On GCC-based toolchains, the error looks like this:
+
+.. code-block:: none
+
+   error: '__device_dts_ord_DT_N_ALIAS_led_P_gpios_IDX_0_PH_ORD' undeclared here (not in a function)
+
+Adding board support
+********************
+
+To add support for your board, add something like this to your devicetree:
+
+.. code-block:: DTS
+
+   / {
+   	aliases {
+   		led0 = &myled0;
+   	};
+
+   	leds {
+   		compatible = "gpio-leds";
+   		myled0: led_0 {
+   			gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+                };
+   	};
+   };
+
+The above sets your board's ``led0`` alias to use pin 13 on GPIO controller
+``gpio0``. The pin flags :c:macro:`GPIO_ACTIVE_HIGH` mean the LED is on when
+the pin is set to its high state, and off when the pin is in its low state.
+
+Tips:
+
+- See :dtcompatible:`gpio-leds` for more information on defining GPIO-based LEDs
+  in devicetree.
+
+- If you're not sure what to do, check the devicetrees for supported boards which
+  use the same SoC as your target. See :ref:`get-devicetree-outputs` for details.
+
+- See :zephyr_file:`include/zephyr/dt-bindings/gpio/gpio.h` for the flags you can use
+  in devicetree.
+
+- If the LED is built in to your board hardware, the alias should be defined in
+  your :ref:`BOARD.dts file <devicetree-in-out-files>`. Otherwise, you can
+  define one in a :ref:`devicetree overlay <set-devicetree-overlays>`.
--- a/benchies/embench/prj.conf
+++ b/benchies/embench/prj.conf
@ -0,0 +1,9 @@
+CONFIG_GPIO=y
+#CONFIG_ASAN=y
+#CONFIG_CFI=y
+CONFIG_LLVM_USE_LLD=y
+#CONFIG_LTO=y
+
+#CONFIG_DEBUG=y
+#CONFIG_DEBUG_INFO=y
+#CONFIG_DEBUG_OPTIMIZATIONS=y
--- a/benchies/embench/sample.yaml
+++ b/benchies/embench/sample.yaml
@ -0,0 +1,12 @@
+sample:
+  name: Blinky Sample
+tests:
+  sample.basic.blinky:
+    tags:
+      - LED
+      - gpio
+    filter: dt_enabled_alias_with_parent_compat("led0", "gpio-leds")
+    depends_on: gpio
+    harness: led
+    integration_platforms:
+      - frdm_k64f
--- a/benchies/embench/src/bench.c
+++ b/benchies/embench/src/bench.c
@ -0,0 +1,244 @@
+/*
+ * Simple MD5 implementation
+ * by Creationix
+ * https://gist.github.com/creationix/4710780
+ * Licensed under MIT
+ *
+ * modified by Julian Kunkel for Embench-iot
+ * Compile with: gcc -o md5 -O3 -lm md5.c
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "support.h"
+
+#define LOCAL_SCALE_FACTOR 51
+
+/* BEEBS heap is just an array */
+/* MSG_SIZE * 2 + ((((MSG_SIZE+8)/64 + 1) * 64) - 8) + 64 */
+#define HEAP_SIZE (2000 + 1016 + 64)
+#define MSG_SIZE 1000
+/* Result obtained with a single run on the native target on x86 with a MSG_SIZE
+ * of 1000 and a msg initiated incrementally from 0 to 999 as in benchmark_body.
+ * If MSG_SIZE or the initialization mechanism of the array change the RESULT
+ * value needs to be updated accordingly. */
+#define RESULT 0x33f673b4
+
+static char heap[HEAP_SIZE];
+
+// leftrotate function definition
+#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
+
+// These vars will contain the hash
+static uint32_t h0, h1, h2, h3;
+
+void md5(uint8_t *initial_msg, size_t initial_len) {
+
+    // Message (to prepare)
+    uint8_t *msg = NULL;
+
+    // Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating
+
+    // r specifies the per-round shift amounts
+
+    uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
+                    5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20,
+                    4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
+                    6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
+
+    // Use binary integer part of the sines of integers (in radians) as constants// Initialize variables:
+    uint32_t k[] = {
+        0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+        0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+        0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+        0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+        0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+        0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+        0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+        0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+        0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+        0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+        0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
+        0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+        0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+        0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+        0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+        0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};
+
+    h0 = 0x67452301;
+    h1 = 0xefcdab89;
+    h2 = 0x98badcfe;
+    h3 = 0x10325476;
+
+    // Pre-processing: adding a single 1 bit
+    //append "1" bit to message
+    /* Notice: the input bytes are considered as bits strings,
+       where the first bit is the most significant bit of the byte.[37] */
+
+    // Pre-processing: padding with zeros
+    //append "0" bit until message length in bit ≡ 448 (mod 512)
+    //append length mod (2 pow 64) to message
+
+    int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8;
+
+    msg = calloc(new_len + 64, 1); // also appends "0" bits
+                                   // (we alloc also 64 extra bytes...)
+    memcpy(msg, initial_msg, initial_len);
+    msg[initial_len] = 128; // write the "1" bit
+
+    uint32_t bits_len = 8*initial_len; // note, we append the len
+    memcpy(msg + new_len, &bits_len, 4);           // in bits at the end of the buffer
+
+    // Process the message in successive 512-bit chunks:
+    //for each 512-bit chunk of message:
+    int offset;
+    for(offset=0; offset<new_len; offset += (512/8)) {
+
+        // break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
+        uint32_t *w = (uint32_t *) (msg + offset);
+
+#ifdef DEBUG
+        printf("offset: %d %x\n", offset, offset);
+
+        int j;
+        for(j =0; j < 64; j++) printf("%x ", ((uint8_t *) w)[j]);
+        puts("");
+#endif
+
+        // Initialize hash value for this chunk:
+        uint32_t a = h0;
+        uint32_t b = h1;
+        uint32_t c = h2;
+        uint32_t d = h3;
+
+        // Main loop:
+        uint32_t i;
+        for(i = 0; i<64; i++) {
+
+#ifdef ROUNDS
+            uint8_t *p;
+            printf("%i: ", i);
+            p=(uint8_t *)&a;
+            printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], a);
+
+            p=(uint8_t *)&b;
+            printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], b);
+
+            p=(uint8_t *)&c;
+            printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], c);
+
+            p=(uint8_t *)&d;
+            printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], d);
+            puts("");
+#endif
+
+
+            uint32_t f, g;
+
+             if (i < 16) {
+                f = (b & c) | ((~b) & d);
+                g = i;
+            } else if (i < 32) {
+                f = (d & b) | ((~d) & c);
+                g = (5*i + 1) % 16;
+            } else if (i < 48) {
+                f = b ^ c ^ d;
+                g = (3*i + 5) % 16;
+            } else {
+                f = c ^ (b | (~d));
+                g = (7*i) % 16;
+            }
+
+#ifdef ROUNDS
+            printf("f=%x g=%d w[g]=%x\n", f, g, w[g]);
+#endif
+            uint32_t temp = d;
+            d = c;
+            c = b;
+#ifdef DEBUG
+            printf("rotateLeft(%x + %x + %x + %x, %d)\n", a, f, k[i], w[g], r[i]);
+#endif
+            b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i]);
+            a = temp;
+        }
+
+        // Add this chunk's hash to result so far:
+
+        h0 += a;
+        h1 += b;
+        h2 += c;
+        h3 += d;
+    }
+
+    // cleanup
+    free(msg);
+}
+
+
+
+void
+initialise_benchmark (void)
+{
+}
+
+
+static int benchmark_body (int rpt, int len);
+
+void
+warm_caches (int  heat)
+{
+  benchmark_body (heat, MSG_SIZE);
+
+  return;
+}
+
+
+int
+benchmark (void)
+{
+  return benchmark_body (LOCAL_SCALE_FACTOR * CPU_MHZ, MSG_SIZE);
+}
+
+static int __attribute__ ((noinline))
+benchmark_body (int rpt, int len)
+{
+  int i, j;
+
+  for (j = 0; j < rpt; j++) {
+
+    uint8_t *msg = malloc(len);
+    for (i = 0; i < len; i++){
+      msg[i] = i;
+    }
+    md5(msg, len);
+    free(msg);
+
+    uint8_t *p;
+    // display result
+#ifdef DEBUG
+    p=(uint8_t *)&h0;
+    printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3]);
+
+    p=(uint8_t *)&h1;
+    printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3]);
+
+    p=(uint8_t *)&h2;
+    printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3]);
+
+    p=(uint8_t *)&h3;
+    printf("%2.2x%2.2x%2.2x%2.2x\n", p[0], p[1], p[2], p[3]);
+#endif
+  }
+
+  return h0 ^ h1 ^ h2 ^ h3;
+}
+
+
+int
+verify_benchmark (int r)
+{
+  // This isn't a proper check...
+  return r == RESULT;
+}
--- a/benchies/embench/src/main.c
+++ b/benchies/embench/src/main.c
@ -0,0 +1,47 @@
+/* Common main.c for the benchmarks
+
+   Copyright (C) 2014 Embecosm Limited and University of Bristol
+   Copyright (C) 2018-2019 Embecosm Limited
+
+   Contributor: James Pallister <james.pallister@bristol.ac.uk>
+   Contributor: Jeremy Bennett <jeremy.bennett@embecosm.com>
+
+   This file is part of Embench and was formerly part of the Bristol/Embecosm
+   Embedded Benchmark Suite.
+
+   SPDX-License-Identifier: GPL-3.0-or-later */
+
+#include "support.h"
+#include <stdio.h>
+
+int __attribute__((used)) main(int argc __attribute__((unused)),
+			       char *argv[] __attribute__((unused)))
+{
+	int i;
+	volatile int result;
+	int correct;
+
+	puts("Initialising benchmark");
+	initialise_benchmark();
+	puts("Warming up caches");
+	warm_caches(WARMUP_HEAT);
+
+	puts("Running benchmark");
+	result = benchmark();
+	puts("finished benchmark");
+
+	/* bmarks that use arrays will check a global array rather than int result */
+
+	correct = verify_benchmark(result);
+
+	puts("returning");
+	return (!correct);
+
+} /* main () */
+
+/*
+   Local Variables:
+   mode: C
+   c-file-style: "gnu"
+   End:
+*/
--- a/benchies/embench/src/support.h
+++ b/benchies/embench/src/support.h
@ -0,0 +1,69 @@
+/* Support header for BEEBS.
+
+   Copyright (C) 2014 Embecosm Limited and the University of Bristol
+   Copyright (C) 2019 Embecosm Limited
+
+   Contributor James Pallister <james.pallister@bristol.ac.uk>
+
+   Contributor Jeremy Bennett <jeremy.bennett@embecosm.com>
+
+   This file is part of Embench and was formerly part of the Bristol/Embecosm
+   Embedded Benchmark Suite.
+
+   SPDX-License-Identifier: GPL-3.0-or-later */
+
+#ifndef SUPPORT_H
+#define SUPPORT_H
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+//#define DEBUG
+#define CPU_MHZ 1
+#define WARMUP_HEAT 1000
+
+/* Include board support header if we have one */
+
+/* Benchmarks must implement verify_benchmark, which must return -1 if no
+   verification is done. */
+
+int verify_benchmark (int result);
+
+/* Standard functions implemented for each board */
+
+void initialise_board (void);
+void start_trigger (void);
+void stop_trigger (void);
+
+/* Every benchmark implements this for one-off data initialization.  This is
+   only used for initialization that is independent of how often benchmark ()
+   is called. */
+
+void initialise_benchmark (void);
+
+/* Every benchmark implements this for cache warm up, typically calling
+   benchmark several times. The argument controls how much warming up is
+   done, with 0 meaning no warming. */
+
+void warm_caches (int temperature);
+
+/* Every benchmark implements this as its entry point. Don't allow it to be
+   inlined! */
+
+int benchmark (void) __attribute__ ((noinline));
+
+/* Every benchmark must implement this to validate the result of the
+   benchmark. */
+
+int verify_benchmark (int res);
+
+
+#endif /* SUPPORT_H */
+
+/*
+   Local Variables:
+   mode: C
+   c-file-style: "gnu"
+   End:
+*/
--- a/benchies/linpack/CMakeLists.txt
+++ b/benchies/linpack/CMakeLists.txt
@ -0,0 +1,7 @@
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.20.0)
+find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
+project(blinky)
+
+target_sources(app PRIVATE src/main.c)
--- a/benchies/linpack/README.rst
+++ b/benchies/linpack/README.rst
@ -0,0 +1,97 @@
+.. zephyr:code-sample:: blinky
+   :name: Blinky
+   :relevant-api: gpio_interface
+
+   Blink an LED forever using the GPIO API.
+
+Overview
+********
+
+The Blinky sample blinks an LED forever using the :ref:`GPIO API <gpio_api>`.
+
+The source code shows how to:
+
+#. Get a pin specification from the :ref:`devicetree <dt-guide>` as a
+   :c:struct:`gpio_dt_spec`
+#. Configure the GPIO pin as an output
+#. Toggle the pin forever
+
+See :zephyr:code-sample:`pwm-blinky` for a similar sample that uses the PWM API instead.
+
+.. _blinky-sample-requirements:
+
+Requirements
+************
+
+Your board must:
+
+#. Have an LED connected via a GPIO pin (these are called "User LEDs" on many of
+   Zephyr's :ref:`boards`).
+#. Have the LED configured using the ``led0`` devicetree alias.
+
+Building and Running
+********************
+
+Build and flash Blinky as follows, changing ``reel_board`` for your board:
+
+.. zephyr-app-commands::
+   :zephyr-app: samples/basic/blinky
+   :board: reel_board
+   :goals: build flash
+   :compact:
+
+After flashing, the LED starts to blink and messages with the current LED state
+are printed on the console. If a runtime error occurs, the sample exits without
+printing to the console.
+
+Build errors
+************
+
+You will see a build error at the source code line defining the ``struct
+gpio_dt_spec led`` variable if you try to build Blinky for an unsupported
+board.
+
+On GCC-based toolchains, the error looks like this:
+
+.. code-block:: none
+
+   error: '__device_dts_ord_DT_N_ALIAS_led_P_gpios_IDX_0_PH_ORD' undeclared here (not in a function)
+
+Adding board support
+********************
+
+To add support for your board, add something like this to your devicetree:
+
+.. code-block:: DTS
+
+   / {
+   	aliases {
+   		led0 = &myled0;
+   	};
+
+   	leds {
+   		compatible = "gpio-leds";
+   		myled0: led_0 {
+   			gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+                };
+   	};
+   };
+
+The above sets your board's ``led0`` alias to use pin 13 on GPIO controller
+``gpio0``. The pin flags :c:macro:`GPIO_ACTIVE_HIGH` mean the LED is on when
+the pin is set to its high state, and off when the pin is in its low state.
+
+Tips:
+
+- See :dtcompatible:`gpio-leds` for more information on defining GPIO-based LEDs
+  in devicetree.
+
+- If you're not sure what to do, check the devicetrees for supported boards which
+  use the same SoC as your target. See :ref:`get-devicetree-outputs` for details.
+
+- See :zephyr_file:`include/zephyr/dt-bindings/gpio/gpio.h` for the flags you can use
+  in devicetree.
+
+- If the LED is built in to your board hardware, the alias should be defined in
+  your :ref:`BOARD.dts file <devicetree-in-out-files>`. Otherwise, you can
+  define one in a :ref:`devicetree overlay <set-devicetree-overlays>`.
--- a/benchies/linpack/prj.conf
+++ b/benchies/linpack/prj.conf
@ -0,0 +1,9 @@
+CONFIG_GPIO=y
+#CONFIG_ASAN=y
+#CONFIG_CFI=y
+CONFIG_LLVM_USE_LLD=y
+#CONFIG_LTO=y
+
+#CONFIG_DEBUG=y
+#CONFIG_DEBUG_INFO=y
+#CONFIG_DEBUG_OPTIMIZATIONS=y
--- a/benchies/linpack/sample.yaml
+++ b/benchies/linpack/sample.yaml
@ -0,0 +1,12 @@
+sample:
+  name: Blinky Sample
+tests:
+  sample.basic.blinky:
+    tags:
+      - LED
+      - gpio
+    filter: dt_enabled_alias_with_parent_compat("led0", "gpio-leds")
+    depends_on: gpio
+    harness: led
+    integration_platforms:
+      - frdm_k64f
--- a/benchies/linpack/src/main.c
+++ b/benchies/linpack/src/main.c
@ -0,0 +1,907 @@
+/*
+**
+** LINPACK.C        Linpack benchmark, calculates FLOPS.
+**                  (FLoating Point Operations Per Second)
+**
+** Translated to C by Bonnie Toy 5/88
+**
+** Modified by Will Menninger, 10/93, with these features:
+**  (modified on 2/25/94  to fix a problem with daxpy  for
+**   unequal increments or equal increments not equal to 1.
+**     Jack Dongarra)
+**
+** - Defaults to double precision.
+** - Averages ROLLed and UNROLLed performance.
+** - User selectable array sizes.
+** - Automatically does enough repetitions to take at least 10 CPU seconds.
+** - Prints machine precision.
+** - ANSI prototyping.
+**
+** Modified by ict@nfinit.systems, 12/18, with these features:
+**
+** - Improved double precision defaulting to allow -DSP to work again
+** - Can now take the array size as an argument for automation purposes
+** - Main function return type changed to integer for automation purposes
+** - Re-organized output for cleaner reports
+**
+** To compile:  cc -O -o linpack linpack.c -lm
+**
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <float.h>
+
+#ifndef SP
+#ifndef DP
+#define DP
+#endif
+#endif
+
+#ifdef SP
+#define ZERO        0.0
+#define ONE         1.0
+#define PREC        "Single"
+#define BASE10DIG   FLT_DIG
+
+typedef float   REAL;
+#endif
+
+#ifdef DP
+#define ZERO        0.0e0
+#define ONE         1.0e0
+#define PREC        "Double"
+#define BASE10DIG   DBL_DIG
+
+typedef double  REAL;
+#endif
+
+/* 2022-07-26: Macro defined for memreq variable to resolve warnings
+ *             during malloc check
+ */                                    
+#define MEM_T long
+
+static REAL linpack  (long nreps,int arsize);
+static void matgen   (REAL *a,int lda,int n,REAL *b,REAL *norma);
+static void dgefa    (REAL *a,int lda,int n,int *ipvt,int *info,int roll);
+static void dgesl    (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll);
+static void daxpy_r  (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
+static REAL ddot_r   (int n,REAL *dx,int incx,REAL *dy,int incy);
+static void dscal_r  (int n,REAL da,REAL *dx,int incx);
+static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
+static REAL ddot_ur  (int n,REAL *dx,int incx,REAL *dy,int incy);
+static void dscal_ur (int n,REAL da,REAL *dx,int incx);
+static int  idamax   (int n,REAL *dx,int incx);
+static REAL second   (void);
+
+static void *mempool;
+
+
+int main(int argc, char **argv)
+
+    {
+    char    buf[80];
+    int     arsize;
+    long    arsize2d,nreps;
+    size_t  malloc_arg;
+    MEM_T   memreq;
+
+    while (1)
+        {
+	if (argc < 2)
+       	    {
+	    printf("Enter array size (q to quit) [100]:  ");
+            fgets(buf,79,stdin);
+            }
+	if (buf[0]=='q' || buf[0]=='Q')
+            break;
+        if (buf[0]=='\0' || buf[0]=='\n')
+	    arsize=100;
+	else
+            arsize=atoi(buf);
+        if (argc > 1)
+	    arsize=atoi(argv[1]);
+        arsize/=2;
+        arsize*=2;
+        if (arsize<10)
+            {
+            printf("Too small.\n");
+	    if (argc > 1) break;
+            continue;
+            }
+        arsize2d = (long)arsize*(long)arsize;
+        memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int);
+        malloc_arg=(size_t)memreq;
+        if ((MEM_T)malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL)
+            {
+            printf("Not enough memory available for given array size.\n");
+	    if (argc > 1) break;
+            continue;
+            }
+        printf("LINPACK benchmark, %s precision.\n",PREC);
+        printf("Machine precision:  %d digits.\n",BASE10DIG);
+        printf("Array size %d X %d.\n",arsize,arsize);
+        printf("Memory required:  %ldK.\n",(memreq+512L)>>10);
+        printf("Average rolled and unrolled performance:\n\n");
+        printf("    Reps Time(s) DGEFA   DGESL  OVERHEAD    KFLOPS\n");
+        printf("----------------------------------------------------\n");
+        nreps=1;
+        while (linpack(nreps,arsize)<10.)
+            nreps*=2;
+        free(mempool);
+        printf("\n");
+	if (argc > 1) break;
+        }
+	return 0;
+    }
+
+
+static REAL linpack(long nreps,int arsize)
+
+    {
+    REAL  *a,*b;
+    REAL   norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
+    int   *ipvt,n,info,lda;
+    long   i,arsize2d;
+
+    lda = arsize;
+    n = arsize/2;
+    arsize2d = (long)arsize*(long)arsize;
+    ops=((2.0*n*n*n)/3.0+2.0*n*n);
+    a=(REAL *)mempool;
+    b=a+arsize2d;
+    ipvt=(int *)&b[arsize];
+    tdgesl=0;
+    tdgefa=0;
+    totalt=second();
+    for (i=0;i<nreps;i++)
+        {
+        matgen(a,lda,n,b,&norma);
+        t1 = second();
+        dgefa(a,lda,n,ipvt,&info,1);
+        tdgefa += second()-t1;
+        t1 = second();
+        dgesl(a,lda,n,ipvt,b,0,1);
+        tdgesl += second()-t1;
+        }
+    for (i=0;i<nreps;i++)
+        {
+        matgen(a,lda,n,b,&norma);
+        t1 = second();
+        dgefa(a,lda,n,ipvt,&info,0);
+        tdgefa += second()-t1;
+        t1 = second();
+        dgesl(a,lda,n,ipvt,b,0,0);
+        tdgesl += second()-t1;
+        }
+    totalt=second()-totalt;
+    if (totalt<0.5 || tdgefa+tdgesl<0.2)
+        return(0.);
+    kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
+    toverhead=totalt-tdgefa-tdgesl;
+    if (tdgefa<0.)
+        tdgefa=0.;
+    if (tdgesl<0.)
+        tdgesl=0.;
+    if (toverhead<0.)
+        toverhead=0.;
+    printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%%  %9.3f\n",
+            nreps,totalt,100.*tdgefa/totalt,
+            100.*tdgesl/totalt,100.*toverhead/totalt,
+            kflops);
+    return(totalt);
+    }
+
+
+/*
+** For matgen,
+** We would like to declare a[][lda], but c does not allow it.  In this
+** function, references to a[i][j] are written a[lda*i+j].
+*/
+static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma)
+
+    {
+    int init,i,j;
+
+    init = 1325;
+    *norma = 0.0;
+    for (j = 0; j < n; j++)
+        for (i = 0; i < n; i++)
+            {
+            init = (int)((long)3125*(long)init % 65536L);
+            a[lda*j+i] = (init - 32768.0)/16384.0;
+            *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
+            }
+    for (i = 0; i < n; i++)
+        b[i] = 0.0;
+    for (j = 0; j < n; j++)
+        for (i = 0; i < n; i++)
+            b[i] = b[i] + a[lda*j+i];
+    }
+
+
+/*
+**
+** DGEFA benchmark
+**
+** We would like to declare a[][lda], but c does not allow it.  In this
+** function, references to a[i][j] are written a[lda*i+j].
+**
+**   dgefa factors a double precision matrix by gaussian elimination.
+**
+**   dgefa is usually called by dgeco, but it can be called
+**   directly with a saving in time if  rcond  is not needed.
+**   (time for dgeco) = (1 + 9/n)*(time for dgefa) .
+**
+**   on entry
+**
+**      a       REAL precision[n][lda]
+**              the matrix to be factored.
+**
+**      lda     integer
+**              the leading dimension of the array  a .
+**
+**      n       integer
+**              the order of the matrix  a .
+**
+**   on return
+**
+**      a       an upper triangular matrix and the multipliers
+**              which were used to obtain it.
+**              the factorization can be written  a = l*u  where
+**              l  is a product of permutation and unit lower
+**              triangular matrices and  u  is upper triangular.
+**
+**      ipvt    integer[n]
+**              an integer vector of pivot indices.
+**
+**      info    integer
+**              = 0  normal value.
+**              = k  if  u[k][k] .eq. 0.0 .  this is not an error
+**                   condition for this subroutine, but it does
+**                   indicate that dgesl or dgedi will divide by zero
+**                   if called.  use  rcond  in dgeco for a reliable
+**                   indication of singularity.
+**
+**   linpack. this version dated 08/14/78 .
+**   cleve moler, university of New Mexico, argonne national lab.
+**
+**   functions
+**
+**   blas daxpy,dscal,idamax
+**
+*/
+static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll)
+
+    {
+    REAL t;
+    int j,k,kp1,l,nm1;
+
+    /* gaussian elimination with partial pivoting */
+
+    if (roll)
+        {
+        *info = 0;
+        nm1 = n - 1;
+        if (nm1 >=  0)
+            for (k = 0; k < nm1; k++)
+                {
+                kp1 = k + 1;
+
+                /* find l = pivot index */
+
+                l = idamax(n-k,&a[lda*k+k],1) + k;
+                ipvt[k] = l;
+
+                /* zero pivot implies this column already
+                   triangularized */
+
+                if (a[lda*k+l] != ZERO)
+                    {
+
+                    /* interchange if necessary */
+
+                    if (l != k)
+                        {
+                        t = a[lda*k+l];
+                        a[lda*k+l] = a[lda*k+k];
+                        a[lda*k+k] = t;
+                        }
+
+                    /* compute multipliers */
+
+                    t = -ONE/a[lda*k+k];
+                    dscal_r(n-(k+1),t,&a[lda*k+k+1],1);
+
+                    /* row elimination with column indexing */
+
+                    for (j = kp1; j < n; j++)
+                        {
+                        t = a[lda*j+l];
+                        if (l != k)
+                            {
+                            a[lda*j+l] = a[lda*j+k];
+                            a[lda*j+k] = t;
+                            }
+                        daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
+                        }
+                    }
+                else
+                    (*info) = k;
+                }
+        ipvt[n-1] = n-1;
+        if (a[lda*(n-1)+(n-1)] == ZERO)
+            (*info) = n-1;
+        }
+    else
+        {
+        *info = 0;
+        nm1 = n - 1;
+        if (nm1 >=  0)
+            for (k = 0; k < nm1; k++)
+                {
+                kp1 = k + 1;
+
+                /* find l = pivot index */
+
+                l = idamax(n-k,&a[lda*k+k],1) + k;
+                ipvt[k] = l;
+
+                /* zero pivot implies this column already
+                   triangularized */
+
+                if (a[lda*k+l] != ZERO)
+                    {
+
+                    /* interchange if necessary */
+
+                    if (l != k)
+                        {
+                        t = a[lda*k+l];
+                        a[lda*k+l] = a[lda*k+k];
+                        a[lda*k+k] = t;
+                        }
+
+                    /* compute multipliers */
+
+                    t = -ONE/a[lda*k+k];
+                    dscal_ur(n-(k+1),t,&a[lda*k+k+1],1);
+
+                    /* row elimination with column indexing */
+
+                    for (j = kp1; j < n; j++)
+                        {
+                        t = a[lda*j+l];
+                        if (l != k)
+                            {
+                            a[lda*j+l] = a[lda*j+k];
+                            a[lda*j+k] = t;
+                            }
+                        daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
+                        }
+                    }
+                else
+                    (*info) = k;
+                }
+        ipvt[n-1] = n-1;
+        if (a[lda*(n-1)+(n-1)] == ZERO)
+            (*info) = n-1;
+        }
+    }
+
+
+/*
+**
+** DGESL benchmark
+**
+** We would like to declare a[][lda], but c does not allow it.  In this
+** function, references to a[i][j] are written a[lda*i+j].
+**
+**   dgesl solves the double precision system
+**   a * x = b  or  trans(a) * x = b
+**   using the factors computed by dgeco or dgefa.
+**
+**   on entry
+**
+**      a       double precision[n][lda]
+**              the output from dgeco or dgefa.
+**
+**      lda     integer
+**              the leading dimension of the array  a .
+**
+**      n       integer
+**              the order of the matrix  a .
+**
+**      ipvt    integer[n]
+**              the pivot vector from dgeco or dgefa.
+**
+**      b       double precision[n]
+**              the right hand side vector.
+**
+**      job     integer
+**              = 0         to solve  a*x = b ,
+**              = nonzero   to solve  trans(a)*x = b  where
+**                          trans(a)  is the transpose.
+**
+**  on return
+**
+**      b       the solution vector  x .
+**
+**   error condition
+**
+**      a division by zero will occur if the input factor contains a
+**      zero on the diagonal.  technically this indicates singularity
+**      but it is often caused by improper arguments or improper
+**      setting of lda .  it will not occur if the subroutines are
+**      called correctly and if dgeco has set rcond .gt. 0.0
+**      or dgefa has set info .eq. 0 .
+**
+**   to compute  inverse(a) * c  where  c  is a matrix
+**   with  p  columns
+**         dgeco(a,lda,n,ipvt,rcond,z)
+**         if (!rcond is too small){
+**              for (j=0,j<p,j++)
+**                      dgesl(a,lda,n,ipvt,c[j][0],0);
+**         }
+**
+**   linpack. this version dated 08/14/78 .
+**   cleve moler, university of new mexico, argonne national lab.
+**
+**   functions
+**
+**   blas daxpy,ddot
+*/
+static void dgesl(REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll)
+
+    {
+    REAL    t;
+    int     k,kb,l,nm1;
+
+    if (roll)
+        {
+        nm1 = n - 1;
+        if (job == 0)
+            {
+
+            /* job = 0 , solve  a * x = b   */
+            /* first solve  l*y = b         */
+
+            if (nm1 >= 1)
+                for (k = 0; k < nm1; k++)
+                    {
+                    l = ipvt[k];
+                    t = b[l];
+                    if (l != k)
+                        {
+                        b[l] = b[k];
+                        b[k] = t;
+                        }
+                    daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
+                    }
+
+            /* now solve  u*x = y */
+
+            for (kb = 0; kb < n; kb++)
+                {
+                k = n - (kb + 1);
+                b[k] = b[k]/a[lda*k+k];
+                t = -b[k];
+                daxpy_r(k,t,&a[lda*k+0],1,&b[0],1);
+                }
+            }
+        else
+            {
+
+            /* job = nonzero, solve  trans(a) * x = b  */
+            /* first solve  trans(u)*y = b             */
+
+            for (k = 0; k < n; k++)
+                {
+                t = ddot_r(k,&a[lda*k+0],1,&b[0],1);
+                b[k] = (b[k] - t)/a[lda*k+k];
+                }
+
+            /* now solve trans(l)*x = y     */
+
+            if (nm1 >= 1)
+                for (kb = 1; kb < nm1; kb++)
+                    {
+                    k = n - (kb+1);
+                    b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
+                    l = ipvt[k];
+                    if (l != k)
+                        {
+                        t = b[l];
+                        b[l] = b[k];
+                        b[k] = t;
+                        }
+                    }
+            }
+        }
+    else
+        {
+        nm1 = n - 1;
+        if (job == 0)
+            {
+
+            /* job = 0 , solve  a * x = b   */
+            /* first solve  l*y = b         */
+
+            if (nm1 >= 1)
+                for (k = 0; k < nm1; k++)
+                    {
+                    l = ipvt[k];
+                    t = b[l];
+                    if (l != k)
+                        {
+                        b[l] = b[k];
+                        b[k] = t;
+                        }
+                    daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
+                    }
+
+            /* now solve  u*x = y */
+
+            for (kb = 0; kb < n; kb++)
+                {
+                k = n - (kb + 1);
+                b[k] = b[k]/a[lda*k+k];
+                t = -b[k];
+                daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1);
+                }
+            }
+        else
+            {
+
+            /* job = nonzero, solve  trans(a) * x = b  */
+            /* first solve  trans(u)*y = b             */
+
+            for (k = 0; k < n; k++)
+                {
+                t = ddot_ur(k,&a[lda*k+0],1,&b[0],1);
+                b[k] = (b[k] - t)/a[lda*k+k];
+                }
+
+            /* now solve trans(l)*x = y     */
+
+            if (nm1 >= 1)
+                for (kb = 1; kb < nm1; kb++)
+                    {
+                    k = n - (kb+1);
+                    b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
+                    l = ipvt[k];
+                    if (l != k)
+                        {
+                        t = b[l];
+                        b[l] = b[k];
+                        b[k] = t;
+                        }
+                    }
+            }
+        }
+    }
+
+
+
+/*
+** Constant times a vector plus a vector.
+** Jack Dongarra, linpack, 3/11/78.
+** ROLLED version
+*/
+static void daxpy_r(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
+
+    {
+    int i,ix,iy;
+
+    if (n <= 0)
+        return;
+    if (da == ZERO)
+        return;
+
+    if (incx != 1 || incy != 1)
+        {
+
+        /* code for unequal increments or equal increments != 1 */
+
+        ix = 1;
+        iy = 1;
+        if(incx < 0) ix = (-n+1)*incx + 1;
+        if(incy < 0)iy = (-n+1)*incy + 1;
+        for (i = 0;i < n; i++)
+            {
+            dy[iy] = dy[iy] + da*dx[ix];
+            ix = ix + incx;
+            iy = iy + incy;
+            }
+        return;
+        }
+
+    /* code for both increments equal to 1 */
+
+    for (i = 0;i < n; i++)
+        dy[i] = dy[i] + da*dx[i];
+    }
+
+
+/*
+** Forms the dot product of two vectors.
+** Jack Dongarra, linpack, 3/11/78.
+** ROLLED version
+*/
+static REAL ddot_r(int n,REAL *dx,int incx,REAL *dy,int incy)
+
+    {
+    REAL dtemp;
+    int i,ix,iy;
+
+    dtemp = ZERO;
+
+    if (n <= 0)
+        return(ZERO);
+
+    if (incx != 1 || incy != 1)
+        {
+
+        /* code for unequal increments or equal increments != 1 */
+
+        ix = 0;
+        iy = 0;
+        if (incx < 0) ix = (-n+1)*incx;
+        if (incy < 0) iy = (-n+1)*incy;
+        for (i = 0;i < n; i++)
+            {
+            dtemp = dtemp + dx[ix]*dy[iy];
+            ix = ix + incx;
+            iy = iy + incy;
+            }
+        return(dtemp);
+        }
+
+    /* code for both increments equal to 1 */
+
+    for (i=0;i < n; i++)
+        dtemp = dtemp + dx[i]*dy[i];
+    return(dtemp);
+    }
+
+
+/*
+** Scales a vector by a constant.
+** Jack Dongarra, linpack, 3/11/78.
+** ROLLED version
+*/
+static void dscal_r(int n,REAL da,REAL *dx,int incx)
+
+    {
+    int i,nincx;
+
+    if (n <= 0)
+        return;
+    if (incx != 1)
+        {
+
+        /* code for increment not equal to 1 */
+
+        nincx = n*incx;
+        for (i = 0; i < nincx; i = i + incx)
+            dx[i] = da*dx[i];
+        return;
+        }
+
+    /* code for increment equal to 1 */
+
+    for (i = 0; i < n; i++)
+        dx[i] = da*dx[i];
+    }
+
+
+/*
+** constant times a vector plus a vector.
+** Jack Dongarra, linpack, 3/11/78.
+** UNROLLED version
+*/
+static void daxpy_ur(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
+
+    {
+    int i,ix,iy,m;
+
+    if (n <= 0)
+        return;
+    if (da == ZERO)
+        return;
+
+    if (incx != 1 || incy != 1)
+        {
+
+        /* code for unequal increments or equal increments != 1 */
+
+        ix = 1;
+        iy = 1;
+        if(incx < 0) ix = (-n+1)*incx + 1;
+        if(incy < 0)iy = (-n+1)*incy + 1;
+        for (i = 0;i < n; i++)
+            {
+            dy[iy] = dy[iy] + da*dx[ix];
+            ix = ix + incx;
+            iy = iy + incy;
+            }
+        return;
+        }
+
+    /* code for both increments equal to 1 */
+
+    m = n % 4;
+    if ( m != 0)
+        {
+        for (i = 0; i < m; i++)
+            dy[i] = dy[i] + da*dx[i];
+        if (n < 4)
+            return;
+        }
+    for (i = m; i < n; i = i + 4)
+        {
+        dy[i] = dy[i] + da*dx[i];
+        dy[i+1] = dy[i+1] + da*dx[i+1];
+        dy[i+2] = dy[i+2] + da*dx[i+2];
+        dy[i+3] = dy[i+3] + da*dx[i+3];
+        }
+    }
+
+
+/*
+** Forms the dot product of two vectors.
+** Jack Dongarra, linpack, 3/11/78.
+** UNROLLED version
+*/
+static REAL ddot_ur(int n,REAL *dx,int incx,REAL *dy,int incy)
+
+    {
+    REAL dtemp;
+    int i,ix,iy,m;
+
+    dtemp = ZERO;
+
+    if (n <= 0)
+        return(ZERO);
+
+    if (incx != 1 || incy != 1)
+        {
+
+        /* code for unequal increments or equal increments != 1 */
+
+        ix = 0;
+        iy = 0;
+        if (incx < 0) ix = (-n+1)*incx;
+        if (incy < 0) iy = (-n+1)*incy;
+        for (i = 0;i < n; i++)
+            {
+            dtemp = dtemp + dx[ix]*dy[iy];
+            ix = ix + incx;
+            iy = iy + incy;
+            }
+        return(dtemp);
+        }
+
+    /* code for both increments equal to 1 */
+
+    m = n % 5;
+    if (m != 0)
+        {
+        for (i = 0; i < m; i++)
+            dtemp = dtemp + dx[i]*dy[i];
+        if (n < 5)
+            return(dtemp);
+        }
+    for (i = m; i < n; i = i + 5)
+        {
+        dtemp = dtemp + dx[i]*dy[i] +
+        dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
+        dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
+        }
+    return(dtemp);
+    }
+
+
+/*
+** Scales a vector by a constant.
+** Jack Dongarra, linpack, 3/11/78.
+** UNROLLED version
+*/
+static void dscal_ur(int n,REAL da,REAL *dx,int incx)
+
+    {
+    int i,m,nincx;
+
+    if (n <= 0)
+        return;
+    if (incx != 1)
+        {
+
+        /* code for increment not equal to 1 */
+
+        nincx = n*incx;
+        for (i = 0; i < nincx; i = i + incx)
+            dx[i] = da*dx[i];
+        return;
+        }
+
+    /* code for increment equal to 1 */
+
+    m = n % 5;
+    if (m != 0)
+        {
+        for (i = 0; i < m; i++)
+            dx[i] = da*dx[i];
+        if (n < 5)
+            return;
+        }
+    for (i = m; i < n; i = i + 5)
+        {
+        dx[i] = da*dx[i];
+        dx[i+1] = da*dx[i+1];
+        dx[i+2] = da*dx[i+2];
+        dx[i+3] = da*dx[i+3];
+        dx[i+4] = da*dx[i+4];
+        }
+    }
+
+
+/*
+** Finds the index of element having max. absolute value.
+** Jack Dongarra, linpack, 3/11/78.
+*/
+static int idamax(int n,REAL *dx,int incx)
+
+    {
+    REAL dmax;
+    int i, ix, itemp;
+
+    if (n < 1)
+        return(-1);
+    if (n ==1 )
+        return(0);
+    if(incx != 1)
+        {
+
+        /* code for increment not equal to 1 */
+
+        ix = 1;
+        dmax = fabs((double)dx[0]);
+        ix = ix + incx;
+        for (i = 1; i < n; i++)
+            {
+            if(fabs((double)dx[ix]) > dmax)
+                {
+                itemp = i;
+                dmax = fabs((double)dx[ix]);
+                }
+            ix = ix + incx;
+            }
+        }
+    else
+        {
+
+        /* code for increment equal to 1 */
+
+        itemp = 0;
+        dmax = fabs((double)dx[0]);
+        for (i = 1; i < n; i++)
+            if(fabs((double)dx[i]) > dmax)
+                {
+                itemp = i;
+                dmax = fabs((double)dx[i]);
+                }
+        }
+    return (itemp);
+    }
+
+
+static REAL second(void)
+
+    {
+    return ((REAL)((REAL)clock()/(REAL)CLOCKS_PER_SEC));
+    }
+
+
Author	SHA1	Message	Date
Patrick	74d5d82f57	· Some checks failed Hello World (Multiplatform) / build (macos-12) (push) Has been cancelled Details Hello World (Multiplatform) / build (macos-14) (push) Has been cancelled Details Hello World (Multiplatform) / build (ubuntu-22.04) (push) Has been cancelled Details Hello World (Multiplatform) / build (windows-2022) (push) Has been cancelled Details Run tests with twister / twister-build-prep (push) Has been cancelled Details Run tests with twister / twister-build (push) Has been cancelled Details Run tests with twister / Publish Unit Tests Results (push) Has been cancelled Details Run tests with twister / Check Twister Status (push) Has been cancelled Details	2024-09-15 19:24:26 +02:00
Patrick	9fb09fae0d	first benchmarks. Don't really make sense	2024-09-03 12:59:26 +02:00