Compare commits

...

2 commits

Author SHA1 Message Date
Patrick 74d5d82f57
·
Some checks failed
Hello World (Multiplatform) / build (macos-12) (push) Has been cancelled
Hello World (Multiplatform) / build (macos-14) (push) Has been cancelled
Hello World (Multiplatform) / build (ubuntu-22.04) (push) Has been cancelled
Hello World (Multiplatform) / build (windows-2022) (push) Has been cancelled
Run tests with twister / twister-build-prep (push) Has been cancelled
Run tests with twister / twister-build (push) Has been cancelled
Run tests with twister / Publish Unit Tests Results (push) Has been cancelled
Run tests with twister / Check Twister Status (push) Has been cancelled
2024-09-15 19:24:26 +02:00
Patrick 9fb09fae0d
first benchmarks. Don't really make sense 2024-09-03 12:59:26 +02:00
13 changed files with 1534 additions and 0 deletions

17
benchies.md Normal file
View file

@ -0,0 +1,17 @@
app_kernel: runs in 0 ns
cmsis_dsp/basicmath: runs in 0 ns
data_structure_perf/dlist_perf: ""
data_structure_perf/rbtree_perf: ""
footprints: runs at least 7 minutes without output
latency_measures: 0ns
mbedtls: at least 3mins without output
sched: 0ns an doesn't exit
sched_userspace: doesn't compile
sys_kernel: 0 ns and no exit
linpack:
LTO && CFI -> ~ 4000000 KFLOPS
LTO && !CFI -> ~ 3650000 KFLOPS
!LTO && !CFI -> ~ 3650000 KFLOPS
mibench -> not implemented for zephyr and no reason to suspect result differ from linpack

View file

@ -0,0 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
cmake_minimum_required(VERSION 3.20.0)
find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
project(blinky)
target_sources(app PRIVATE src/main.c src/bench.c)

View file

@ -0,0 +1,97 @@
.. zephyr:code-sample:: blinky
:name: Blinky
:relevant-api: gpio_interface
Blink an LED forever using the GPIO API.
Overview
********
The Blinky sample blinks an LED forever using the :ref:`GPIO API <gpio_api>`.
The source code shows how to:
#. Get a pin specification from the :ref:`devicetree <dt-guide>` as a
:c:struct:`gpio_dt_spec`
#. Configure the GPIO pin as an output
#. Toggle the pin forever
See :zephyr:code-sample:`pwm-blinky` for a similar sample that uses the PWM API instead.
.. _blinky-sample-requirements:
Requirements
************
Your board must:
#. Have an LED connected via a GPIO pin (these are called "User LEDs" on many of
Zephyr's :ref:`boards`).
#. Have the LED configured using the ``led0`` devicetree alias.
Building and Running
********************
Build and flash Blinky as follows, changing ``reel_board`` for your board:
.. zephyr-app-commands::
:zephyr-app: samples/basic/blinky
:board: reel_board
:goals: build flash
:compact:
After flashing, the LED starts to blink and messages with the current LED state
are printed on the console. If a runtime error occurs, the sample exits without
printing to the console.
Build errors
************
You will see a build error at the source code line defining the ``struct
gpio_dt_spec led`` variable if you try to build Blinky for an unsupported
board.
On GCC-based toolchains, the error looks like this:
.. code-block:: none
error: '__device_dts_ord_DT_N_ALIAS_led_P_gpios_IDX_0_PH_ORD' undeclared here (not in a function)
Adding board support
********************
To add support for your board, add something like this to your devicetree:
.. code-block:: DTS
/ {
aliases {
led0 = &myled0;
};
leds {
compatible = "gpio-leds";
myled0: led_0 {
gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
};
};
};
The above sets your board's ``led0`` alias to use pin 13 on GPIO controller
``gpio0``. The pin flags :c:macro:`GPIO_ACTIVE_HIGH` mean the LED is on when
the pin is set to its high state, and off when the pin is in its low state.
Tips:
- See :dtcompatible:`gpio-leds` for more information on defining GPIO-based LEDs
in devicetree.
- If you're not sure what to do, check the devicetrees for supported boards which
use the same SoC as your target. See :ref:`get-devicetree-outputs` for details.
- See :zephyr_file:`include/zephyr/dt-bindings/gpio/gpio.h` for the flags you can use
in devicetree.
- If the LED is built in to your board hardware, the alias should be defined in
your :ref:`BOARD.dts file <devicetree-in-out-files>`. Otherwise, you can
define one in a :ref:`devicetree overlay <set-devicetree-overlays>`.

View file

@ -0,0 +1,9 @@
CONFIG_GPIO=y
#CONFIG_ASAN=y
#CONFIG_CFI=y
CONFIG_LLVM_USE_LLD=y
#CONFIG_LTO=y
#CONFIG_DEBUG=y
#CONFIG_DEBUG_INFO=y
#CONFIG_DEBUG_OPTIMIZATIONS=y

View file

@ -0,0 +1,12 @@
sample:
name: Blinky Sample
tests:
sample.basic.blinky:
tags:
- LED
- gpio
filter: dt_enabled_alias_with_parent_compat("led0", "gpio-leds")
depends_on: gpio
harness: led
integration_platforms:
- frdm_k64f

View file

@ -0,0 +1,244 @@
/*
* Simple MD5 implementation
* by Creationix
* https://gist.github.com/creationix/4710780
* Licensed under MIT
*
* modified by Julian Kunkel for Embench-iot
* Compile with: gcc -o md5 -O3 -lm md5.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "support.h"
#define LOCAL_SCALE_FACTOR 51
/* BEEBS heap is just an array */
/* MSG_SIZE * 2 + ((((MSG_SIZE+8)/64 + 1) * 64) - 8) + 64 */
#define HEAP_SIZE (2000 + 1016 + 64)
#define MSG_SIZE 1000
/* Result obtained with a single run on the native target on x86 with a MSG_SIZE
* of 1000 and a msg initiated incrementally from 0 to 999 as in benchmark_body.
* If MSG_SIZE or the initialization mechanism of the array change the RESULT
* value needs to be updated accordingly. */
#define RESULT 0x33f673b4
static char heap[HEAP_SIZE];
// leftrotate function definition
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
// These vars will contain the hash
static uint32_t h0, h1, h2, h3;
void md5(uint8_t *initial_msg, size_t initial_len) {
// Message (to prepare)
uint8_t *msg = NULL;
// Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating
// r specifies the per-round shift amounts
uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
// Use binary integer part of the sines of integers (in radians) as constants// Initialize variables:
uint32_t k[] = {
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};
h0 = 0x67452301;
h1 = 0xefcdab89;
h2 = 0x98badcfe;
h3 = 0x10325476;
// Pre-processing: adding a single 1 bit
//append "1" bit to message
/* Notice: the input bytes are considered as bits strings,
where the first bit is the most significant bit of the byte.[37] */
// Pre-processing: padding with zeros
//append "0" bit until message length in bit ≡ 448 (mod 512)
//append length mod (2 pow 64) to message
int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8;
msg = calloc(new_len + 64, 1); // also appends "0" bits
// (we alloc also 64 extra bytes...)
memcpy(msg, initial_msg, initial_len);
msg[initial_len] = 128; // write the "1" bit
uint32_t bits_len = 8*initial_len; // note, we append the len
memcpy(msg + new_len, &bits_len, 4); // in bits at the end of the buffer
// Process the message in successive 512-bit chunks:
//for each 512-bit chunk of message:
int offset;
for(offset=0; offset<new_len; offset += (512/8)) {
// break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
uint32_t *w = (uint32_t *) (msg + offset);
#ifdef DEBUG
printf("offset: %d %x\n", offset, offset);
int j;
for(j =0; j < 64; j++) printf("%x ", ((uint8_t *) w)[j]);
puts("");
#endif
// Initialize hash value for this chunk:
uint32_t a = h0;
uint32_t b = h1;
uint32_t c = h2;
uint32_t d = h3;
// Main loop:
uint32_t i;
for(i = 0; i<64; i++) {
#ifdef ROUNDS
uint8_t *p;
printf("%i: ", i);
p=(uint8_t *)&a;
printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], a);
p=(uint8_t *)&b;
printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], b);
p=(uint8_t *)&c;
printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], c);
p=(uint8_t *)&d;
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], d);
puts("");
#endif
uint32_t f, g;
if (i < 16) {
f = (b & c) | ((~b) & d);
g = i;
} else if (i < 32) {
f = (d & b) | ((~d) & c);
g = (5*i + 1) % 16;
} else if (i < 48) {
f = b ^ c ^ d;
g = (3*i + 5) % 16;
} else {
f = c ^ (b | (~d));
g = (7*i) % 16;
}
#ifdef ROUNDS
printf("f=%x g=%d w[g]=%x\n", f, g, w[g]);
#endif
uint32_t temp = d;
d = c;
c = b;
#ifdef DEBUG
printf("rotateLeft(%x + %x + %x + %x, %d)\n", a, f, k[i], w[g], r[i]);
#endif
b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i]);
a = temp;
}
// Add this chunk's hash to result so far:
h0 += a;
h1 += b;
h2 += c;
h3 += d;
}
// cleanup
free(msg);
}
void
initialise_benchmark (void)
{
}
static int benchmark_body (int rpt, int len);
void
warm_caches (int heat)
{
benchmark_body (heat, MSG_SIZE);
return;
}
int
benchmark (void)
{
return benchmark_body (LOCAL_SCALE_FACTOR * CPU_MHZ, MSG_SIZE);
}
static int __attribute__ ((noinline))
benchmark_body (int rpt, int len)
{
int i, j;
for (j = 0; j < rpt; j++) {
uint8_t *msg = malloc(len);
for (i = 0; i < len; i++){
msg[i] = i;
}
md5(msg, len);
free(msg);
uint8_t *p;
// display result
#ifdef DEBUG
p=(uint8_t *)&h0;
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3]);
p=(uint8_t *)&h1;
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3]);
p=(uint8_t *)&h2;
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3]);
p=(uint8_t *)&h3;
printf("%2.2x%2.2x%2.2x%2.2x\n", p[0], p[1], p[2], p[3]);
#endif
}
return h0 ^ h1 ^ h2 ^ h3;
}
int
verify_benchmark (int r)
{
// This isn't a proper check...
return r == RESULT;
}

View file

@ -0,0 +1,47 @@
/* Common main.c for the benchmarks
Copyright (C) 2014 Embecosm Limited and University of Bristol
Copyright (C) 2018-2019 Embecosm Limited
Contributor: James Pallister <james.pallister@bristol.ac.uk>
Contributor: Jeremy Bennett <jeremy.bennett@embecosm.com>
This file is part of Embench and was formerly part of the Bristol/Embecosm
Embedded Benchmark Suite.
SPDX-License-Identifier: GPL-3.0-or-later */
#include "support.h"
#include <stdio.h>
int __attribute__((used)) main(int argc __attribute__((unused)),
char *argv[] __attribute__((unused)))
{
int i;
volatile int result;
int correct;
puts("Initialising benchmark");
initialise_benchmark();
puts("Warming up caches");
warm_caches(WARMUP_HEAT);
puts("Running benchmark");
result = benchmark();
puts("finished benchmark");
/* bmarks that use arrays will check a global array rather than int result */
correct = verify_benchmark(result);
puts("returning");
return (!correct);
} /* main () */
/*
Local Variables:
mode: C
c-file-style: "gnu"
End:
*/

View file

@ -0,0 +1,69 @@
/* Support header for BEEBS.
Copyright (C) 2014 Embecosm Limited and the University of Bristol
Copyright (C) 2019 Embecosm Limited
Contributor James Pallister <james.pallister@bristol.ac.uk>
Contributor Jeremy Bennett <jeremy.bennett@embecosm.com>
This file is part of Embench and was formerly part of the Bristol/Embecosm
Embedded Benchmark Suite.
SPDX-License-Identifier: GPL-3.0-or-later */
#ifndef SUPPORT_H
#define SUPPORT_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
//#define DEBUG
#define CPU_MHZ 1
#define WARMUP_HEAT 1000
/* Include board support header if we have one */
/* Benchmarks must implement verify_benchmark, which must return -1 if no
verification is done. */
int verify_benchmark (int result);
/* Standard functions implemented for each board */
void initialise_board (void);
void start_trigger (void);
void stop_trigger (void);
/* Every benchmark implements this for one-off data initialization. This is
only used for initialization that is independent of how often benchmark ()
is called. */
void initialise_benchmark (void);
/* Every benchmark implements this for cache warm up, typically calling
benchmark several times. The argument controls how much warming up is
done, with 0 meaning no warming. */
void warm_caches (int temperature);
/* Every benchmark implements this as its entry point. Don't allow it to be
inlined! */
int benchmark (void) __attribute__ ((noinline));
/* Every benchmark must implement this to validate the result of the
benchmark. */
int verify_benchmark (int res);
#endif /* SUPPORT_H */
/*
Local Variables:
mode: C
c-file-style: "gnu"
End:
*/

View file

@ -0,0 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
cmake_minimum_required(VERSION 3.20.0)
find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
project(blinky)
target_sources(app PRIVATE src/main.c)

View file

@ -0,0 +1,97 @@
.. zephyr:code-sample:: blinky
:name: Blinky
:relevant-api: gpio_interface
Blink an LED forever using the GPIO API.
Overview
********
The Blinky sample blinks an LED forever using the :ref:`GPIO API <gpio_api>`.
The source code shows how to:
#. Get a pin specification from the :ref:`devicetree <dt-guide>` as a
:c:struct:`gpio_dt_spec`
#. Configure the GPIO pin as an output
#. Toggle the pin forever
See :zephyr:code-sample:`pwm-blinky` for a similar sample that uses the PWM API instead.
.. _blinky-sample-requirements:
Requirements
************
Your board must:
#. Have an LED connected via a GPIO pin (these are called "User LEDs" on many of
Zephyr's :ref:`boards`).
#. Have the LED configured using the ``led0`` devicetree alias.
Building and Running
********************
Build and flash Blinky as follows, changing ``reel_board`` for your board:
.. zephyr-app-commands::
:zephyr-app: samples/basic/blinky
:board: reel_board
:goals: build flash
:compact:
After flashing, the LED starts to blink and messages with the current LED state
are printed on the console. If a runtime error occurs, the sample exits without
printing to the console.
Build errors
************
You will see a build error at the source code line defining the ``struct
gpio_dt_spec led`` variable if you try to build Blinky for an unsupported
board.
On GCC-based toolchains, the error looks like this:
.. code-block:: none
error: '__device_dts_ord_DT_N_ALIAS_led_P_gpios_IDX_0_PH_ORD' undeclared here (not in a function)
Adding board support
********************
To add support for your board, add something like this to your devicetree:
.. code-block:: DTS
/ {
aliases {
led0 = &myled0;
};
leds {
compatible = "gpio-leds";
myled0: led_0 {
gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
};
};
};
The above sets your board's ``led0`` alias to use pin 13 on GPIO controller
``gpio0``. The pin flags :c:macro:`GPIO_ACTIVE_HIGH` mean the LED is on when
the pin is set to its high state, and off when the pin is in its low state.
Tips:
- See :dtcompatible:`gpio-leds` for more information on defining GPIO-based LEDs
in devicetree.
- If you're not sure what to do, check the devicetrees for supported boards which
use the same SoC as your target. See :ref:`get-devicetree-outputs` for details.
- See :zephyr_file:`include/zephyr/dt-bindings/gpio/gpio.h` for the flags you can use
in devicetree.
- If the LED is built in to your board hardware, the alias should be defined in
your :ref:`BOARD.dts file <devicetree-in-out-files>`. Otherwise, you can
define one in a :ref:`devicetree overlay <set-devicetree-overlays>`.

View file

@ -0,0 +1,9 @@
CONFIG_GPIO=y
#CONFIG_ASAN=y
#CONFIG_CFI=y
CONFIG_LLVM_USE_LLD=y
#CONFIG_LTO=y
#CONFIG_DEBUG=y
#CONFIG_DEBUG_INFO=y
#CONFIG_DEBUG_OPTIMIZATIONS=y

View file

@ -0,0 +1,12 @@
sample:
name: Blinky Sample
tests:
sample.basic.blinky:
tags:
- LED
- gpio
filter: dt_enabled_alias_with_parent_compat("led0", "gpio-leds")
depends_on: gpio
harness: led
integration_platforms:
- frdm_k64f

907
benchies/linpack/src/main.c Normal file
View file

@ -0,0 +1,907 @@
/*
**
** LINPACK.C Linpack benchmark, calculates FLOPS.
** (FLoating Point Operations Per Second)
**
** Translated to C by Bonnie Toy 5/88
**
** Modified by Will Menninger, 10/93, with these features:
** (modified on 2/25/94 to fix a problem with daxpy for
** unequal increments or equal increments not equal to 1.
** Jack Dongarra)
**
** - Defaults to double precision.
** - Averages ROLLed and UNROLLed performance.
** - User selectable array sizes.
** - Automatically does enough repetitions to take at least 10 CPU seconds.
** - Prints machine precision.
** - ANSI prototyping.
**
** Modified by ict@nfinit.systems, 12/18, with these features:
**
** - Improved double precision defaulting to allow -DSP to work again
** - Can now take the array size as an argument for automation purposes
** - Main function return type changed to integer for automation purposes
** - Re-organized output for cleaner reports
**
** To compile: cc -O -o linpack linpack.c -lm
**
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <float.h>
#ifndef SP
#ifndef DP
#define DP
#endif
#endif
#ifdef SP
#define ZERO 0.0
#define ONE 1.0
#define PREC "Single"
#define BASE10DIG FLT_DIG
typedef float REAL;
#endif
#ifdef DP
#define ZERO 0.0e0
#define ONE 1.0e0
#define PREC "Double"
#define BASE10DIG DBL_DIG
typedef double REAL;
#endif
/* 2022-07-26: Macro defined for memreq variable to resolve warnings
* during malloc check
*/
#define MEM_T long
static REAL linpack (long nreps,int arsize);
static void matgen (REAL *a,int lda,int n,REAL *b,REAL *norma);
static void dgefa (REAL *a,int lda,int n,int *ipvt,int *info,int roll);
static void dgesl (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll);
static void daxpy_r (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
static REAL ddot_r (int n,REAL *dx,int incx,REAL *dy,int incy);
static void dscal_r (int n,REAL da,REAL *dx,int incx);
static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
static REAL ddot_ur (int n,REAL *dx,int incx,REAL *dy,int incy);
static void dscal_ur (int n,REAL da,REAL *dx,int incx);
static int idamax (int n,REAL *dx,int incx);
static REAL second (void);
static void *mempool;
int main(int argc, char **argv)
{
char buf[80];
int arsize;
long arsize2d,nreps;
size_t malloc_arg;
MEM_T memreq;
while (1)
{
if (argc < 2)
{
printf("Enter array size (q to quit) [100]: ");
fgets(buf,79,stdin);
}
if (buf[0]=='q' || buf[0]=='Q')
break;
if (buf[0]=='\0' || buf[0]=='\n')
arsize=100;
else
arsize=atoi(buf);
if (argc > 1)
arsize=atoi(argv[1]);
arsize/=2;
arsize*=2;
if (arsize<10)
{
printf("Too small.\n");
if (argc > 1) break;
continue;
}
arsize2d = (long)arsize*(long)arsize;
memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int);
malloc_arg=(size_t)memreq;
if ((MEM_T)malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL)
{
printf("Not enough memory available for given array size.\n");
if (argc > 1) break;
continue;
}
printf("LINPACK benchmark, %s precision.\n",PREC);
printf("Machine precision: %d digits.\n",BASE10DIG);
printf("Array size %d X %d.\n",arsize,arsize);
printf("Memory required: %ldK.\n",(memreq+512L)>>10);
printf("Average rolled and unrolled performance:\n\n");
printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n");
printf("----------------------------------------------------\n");
nreps=1;
while (linpack(nreps,arsize)<10.)
nreps*=2;
free(mempool);
printf("\n");
if (argc > 1) break;
}
return 0;
}
static REAL linpack(long nreps,int arsize)
{
REAL *a,*b;
REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
int *ipvt,n,info,lda;
long i,arsize2d;
lda = arsize;
n = arsize/2;
arsize2d = (long)arsize*(long)arsize;
ops=((2.0*n*n*n)/3.0+2.0*n*n);
a=(REAL *)mempool;
b=a+arsize2d;
ipvt=(int *)&b[arsize];
tdgesl=0;
tdgefa=0;
totalt=second();
for (i=0;i<nreps;i++)
{
matgen(a,lda,n,b,&norma);
t1 = second();
dgefa(a,lda,n,ipvt,&info,1);
tdgefa += second()-t1;
t1 = second();
dgesl(a,lda,n,ipvt,b,0,1);
tdgesl += second()-t1;
}
for (i=0;i<nreps;i++)
{
matgen(a,lda,n,b,&norma);
t1 = second();
dgefa(a,lda,n,ipvt,&info,0);
tdgefa += second()-t1;
t1 = second();
dgesl(a,lda,n,ipvt,b,0,0);
tdgesl += second()-t1;
}
totalt=second()-totalt;
if (totalt<0.5 || tdgefa+tdgesl<0.2)
return(0.);
kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
toverhead=totalt-tdgefa-tdgesl;
if (tdgefa<0.)
tdgefa=0.;
if (tdgesl<0.)
tdgesl=0.;
if (toverhead<0.)
toverhead=0.;
printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n",
nreps,totalt,100.*tdgefa/totalt,
100.*tdgesl/totalt,100.*toverhead/totalt,
kflops);
return(totalt);
}
/*
** For matgen,
** We would like to declare a[][lda], but c does not allow it. In this
** function, references to a[i][j] are written a[lda*i+j].
*/
static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma)
{
int init,i,j;
init = 1325;
*norma = 0.0;
for (j = 0; j < n; j++)
for (i = 0; i < n; i++)
{
init = (int)((long)3125*(long)init % 65536L);
a[lda*j+i] = (init - 32768.0)/16384.0;
*norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
}
for (i = 0; i < n; i++)
b[i] = 0.0;
for (j = 0; j < n; j++)
for (i = 0; i < n; i++)
b[i] = b[i] + a[lda*j+i];
}
/*
**
** DGEFA benchmark
**
** We would like to declare a[][lda], but c does not allow it. In this
** function, references to a[i][j] are written a[lda*i+j].
**
** dgefa factors a double precision matrix by gaussian elimination.
**
** dgefa is usually called by dgeco, but it can be called
** directly with a saving in time if rcond is not needed.
** (time for dgeco) = (1 + 9/n)*(time for dgefa) .
**
** on entry
**
** a REAL precision[n][lda]
** the matrix to be factored.
**
** lda integer
** the leading dimension of the array a .
**
** n integer
** the order of the matrix a .
**
** on return
**
** a an upper triangular matrix and the multipliers
** which were used to obtain it.
** the factorization can be written a = l*u where
** l is a product of permutation and unit lower
** triangular matrices and u is upper triangular.
**
** ipvt integer[n]
** an integer vector of pivot indices.
**
** info integer
** = 0 normal value.
** = k if u[k][k] .eq. 0.0 . this is not an error
** condition for this subroutine, but it does
** indicate that dgesl or dgedi will divide by zero
** if called. use rcond in dgeco for a reliable
** indication of singularity.
**
** linpack. this version dated 08/14/78 .
** cleve moler, university of New Mexico, argonne national lab.
**
** functions
**
** blas daxpy,dscal,idamax
**
*/
static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll)
{
REAL t;
int j,k,kp1,l,nm1;
/* gaussian elimination with partial pivoting */
if (roll)
{
*info = 0;
nm1 = n - 1;
if (nm1 >= 0)
for (k = 0; k < nm1; k++)
{
kp1 = k + 1;
/* find l = pivot index */
l = idamax(n-k,&a[lda*k+k],1) + k;
ipvt[k] = l;
/* zero pivot implies this column already
triangularized */
if (a[lda*k+l] != ZERO)
{
/* interchange if necessary */
if (l != k)
{
t = a[lda*k+l];
a[lda*k+l] = a[lda*k+k];
a[lda*k+k] = t;
}
/* compute multipliers */
t = -ONE/a[lda*k+k];
dscal_r(n-(k+1),t,&a[lda*k+k+1],1);
/* row elimination with column indexing */
for (j = kp1; j < n; j++)
{
t = a[lda*j+l];
if (l != k)
{
a[lda*j+l] = a[lda*j+k];
a[lda*j+k] = t;
}
daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
}
}
else
(*info) = k;
}
ipvt[n-1] = n-1;
if (a[lda*(n-1)+(n-1)] == ZERO)
(*info) = n-1;
}
else
{
*info = 0;
nm1 = n - 1;
if (nm1 >= 0)
for (k = 0; k < nm1; k++)
{
kp1 = k + 1;
/* find l = pivot index */
l = idamax(n-k,&a[lda*k+k],1) + k;
ipvt[k] = l;
/* zero pivot implies this column already
triangularized */
if (a[lda*k+l] != ZERO)
{
/* interchange if necessary */
if (l != k)
{
t = a[lda*k+l];
a[lda*k+l] = a[lda*k+k];
a[lda*k+k] = t;
}
/* compute multipliers */
t = -ONE/a[lda*k+k];
dscal_ur(n-(k+1),t,&a[lda*k+k+1],1);
/* row elimination with column indexing */
for (j = kp1; j < n; j++)
{
t = a[lda*j+l];
if (l != k)
{
a[lda*j+l] = a[lda*j+k];
a[lda*j+k] = t;
}
daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
}
}
else
(*info) = k;
}
ipvt[n-1] = n-1;
if (a[lda*(n-1)+(n-1)] == ZERO)
(*info) = n-1;
}
}
/*
**
** DGESL benchmark
**
** We would like to declare a[][lda], but c does not allow it. In this
** function, references to a[i][j] are written a[lda*i+j].
**
** dgesl solves the double precision system
** a * x = b or trans(a) * x = b
** using the factors computed by dgeco or dgefa.
**
** on entry
**
** a double precision[n][lda]
** the output from dgeco or dgefa.
**
** lda integer
** the leading dimension of the array a .
**
** n integer
** the order of the matrix a .
**
** ipvt integer[n]
** the pivot vector from dgeco or dgefa.
**
** b double precision[n]
** the right hand side vector.
**
** job integer
** = 0 to solve a*x = b ,
** = nonzero to solve trans(a)*x = b where
** trans(a) is the transpose.
**
** on return
**
** b the solution vector x .
**
** error condition
**
** a division by zero will occur if the input factor contains a
** zero on the diagonal. technically this indicates singularity
** but it is often caused by improper arguments or improper
** setting of lda . it will not occur if the subroutines are
** called correctly and if dgeco has set rcond .gt. 0.0
** or dgefa has set info .eq. 0 .
**
** to compute inverse(a) * c where c is a matrix
** with p columns
** dgeco(a,lda,n,ipvt,rcond,z)
** if (!rcond is too small){
** for (j=0,j<p,j++)
** dgesl(a,lda,n,ipvt,c[j][0],0);
** }
**
** linpack. this version dated 08/14/78 .
** cleve moler, university of new mexico, argonne national lab.
**
** functions
**
** blas daxpy,ddot
*/
static void dgesl(REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll)
{
REAL t;
int k,kb,l,nm1;
if (roll)
{
nm1 = n - 1;
if (job == 0)
{
/* job = 0 , solve a * x = b */
/* first solve l*y = b */
if (nm1 >= 1)
for (k = 0; k < nm1; k++)
{
l = ipvt[k];
t = b[l];
if (l != k)
{
b[l] = b[k];
b[k] = t;
}
daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
}
/* now solve u*x = y */
for (kb = 0; kb < n; kb++)
{
k = n - (kb + 1);
b[k] = b[k]/a[lda*k+k];
t = -b[k];
daxpy_r(k,t,&a[lda*k+0],1,&b[0],1);
}
}
else
{
/* job = nonzero, solve trans(a) * x = b */
/* first solve trans(u)*y = b */
for (k = 0; k < n; k++)
{
t = ddot_r(k,&a[lda*k+0],1,&b[0],1);
b[k] = (b[k] - t)/a[lda*k+k];
}
/* now solve trans(l)*x = y */
if (nm1 >= 1)
for (kb = 1; kb < nm1; kb++)
{
k = n - (kb+1);
b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
l = ipvt[k];
if (l != k)
{
t = b[l];
b[l] = b[k];
b[k] = t;
}
}
}
}
else
{
nm1 = n - 1;
if (job == 0)
{
/* job = 0 , solve a * x = b */
/* first solve l*y = b */
if (nm1 >= 1)
for (k = 0; k < nm1; k++)
{
l = ipvt[k];
t = b[l];
if (l != k)
{
b[l] = b[k];
b[k] = t;
}
daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
}
/* now solve u*x = y */
for (kb = 0; kb < n; kb++)
{
k = n - (kb + 1);
b[k] = b[k]/a[lda*k+k];
t = -b[k];
daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1);
}
}
else
{
/* job = nonzero, solve trans(a) * x = b */
/* first solve trans(u)*y = b */
for (k = 0; k < n; k++)
{
t = ddot_ur(k,&a[lda*k+0],1,&b[0],1);
b[k] = (b[k] - t)/a[lda*k+k];
}
/* now solve trans(l)*x = y */
if (nm1 >= 1)
for (kb = 1; kb < nm1; kb++)
{
k = n - (kb+1);
b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
l = ipvt[k];
if (l != k)
{
t = b[l];
b[l] = b[k];
b[k] = t;
}
}
}
}
}
/*
** Constant times a vector plus a vector.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static void daxpy_r(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
{
int i,ix,iy;
if (n <= 0)
return;
if (da == ZERO)
return;
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 1;
iy = 1;
if(incx < 0) ix = (-n+1)*incx + 1;
if(incy < 0)iy = (-n+1)*incy + 1;
for (i = 0;i < n; i++)
{
dy[iy] = dy[iy] + da*dx[ix];
ix = ix + incx;
iy = iy + incy;
}
return;
}
/* code for both increments equal to 1 */
for (i = 0;i < n; i++)
dy[i] = dy[i] + da*dx[i];
}
/*
** Forms the dot product of two vectors.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static REAL ddot_r(int n,REAL *dx,int incx,REAL *dy,int incy)
{
REAL dtemp;
int i,ix,iy;
dtemp = ZERO;
if (n <= 0)
return(ZERO);
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 0;
iy = 0;
if (incx < 0) ix = (-n+1)*incx;
if (incy < 0) iy = (-n+1)*incy;
for (i = 0;i < n; i++)
{
dtemp = dtemp + dx[ix]*dy[iy];
ix = ix + incx;
iy = iy + incy;
}
return(dtemp);
}
/* code for both increments equal to 1 */
for (i=0;i < n; i++)
dtemp = dtemp + dx[i]*dy[i];
return(dtemp);
}
/*
** Scales a vector by a constant.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static void dscal_r(int n,REAL da,REAL *dx,int incx)
{
int i,nincx;
if (n <= 0)
return;
if (incx != 1)
{
/* code for increment not equal to 1 */
nincx = n*incx;
for (i = 0; i < nincx; i = i + incx)
dx[i] = da*dx[i];
return;
}
/* code for increment equal to 1 */
for (i = 0; i < n; i++)
dx[i] = da*dx[i];
}
/*
** constant times a vector plus a vector.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static void daxpy_ur(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
{
int i,ix,iy,m;
if (n <= 0)
return;
if (da == ZERO)
return;
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 1;
iy = 1;
if(incx < 0) ix = (-n+1)*incx + 1;
if(incy < 0)iy = (-n+1)*incy + 1;
for (i = 0;i < n; i++)
{
dy[iy] = dy[iy] + da*dx[ix];
ix = ix + incx;
iy = iy + incy;
}
return;
}
/* code for both increments equal to 1 */
m = n % 4;
if ( m != 0)
{
for (i = 0; i < m; i++)
dy[i] = dy[i] + da*dx[i];
if (n < 4)
return;
}
for (i = m; i < n; i = i + 4)
{
dy[i] = dy[i] + da*dx[i];
dy[i+1] = dy[i+1] + da*dx[i+1];
dy[i+2] = dy[i+2] + da*dx[i+2];
dy[i+3] = dy[i+3] + da*dx[i+3];
}
}
/*
** Forms the dot product of two vectors.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static REAL ddot_ur(int n,REAL *dx,int incx,REAL *dy,int incy)
{
REAL dtemp;
int i,ix,iy,m;
dtemp = ZERO;
if (n <= 0)
return(ZERO);
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 0;
iy = 0;
if (incx < 0) ix = (-n+1)*incx;
if (incy < 0) iy = (-n+1)*incy;
for (i = 0;i < n; i++)
{
dtemp = dtemp + dx[ix]*dy[iy];
ix = ix + incx;
iy = iy + incy;
}
return(dtemp);
}
/* code for both increments equal to 1 */
m = n % 5;
if (m != 0)
{
for (i = 0; i < m; i++)
dtemp = dtemp + dx[i]*dy[i];
if (n < 5)
return(dtemp);
}
for (i = m; i < n; i = i + 5)
{
dtemp = dtemp + dx[i]*dy[i] +
dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
}
return(dtemp);
}
/*
** Scales a vector by a constant.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static void dscal_ur(int n,REAL da,REAL *dx,int incx)
{
int i,m,nincx;
if (n <= 0)
return;
if (incx != 1)
{
/* code for increment not equal to 1 */
nincx = n*incx;
for (i = 0; i < nincx; i = i + incx)
dx[i] = da*dx[i];
return;
}
/* code for increment equal to 1 */
m = n % 5;
if (m != 0)
{
for (i = 0; i < m; i++)
dx[i] = da*dx[i];
if (n < 5)
return;
}
for (i = m; i < n; i = i + 5)
{
dx[i] = da*dx[i];
dx[i+1] = da*dx[i+1];
dx[i+2] = da*dx[i+2];
dx[i+3] = da*dx[i+3];
dx[i+4] = da*dx[i+4];
}
}
/*
** Finds the index of element having max. absolute value.
** Jack Dongarra, linpack, 3/11/78.
*/
static int idamax(int n,REAL *dx,int incx)
{
REAL dmax;
int i, ix, itemp;
if (n < 1)
return(-1);
if (n ==1 )
return(0);
if(incx != 1)
{
/* code for increment not equal to 1 */
ix = 1;
dmax = fabs((double)dx[0]);
ix = ix + incx;
for (i = 1; i < n; i++)
{
if(fabs((double)dx[ix]) > dmax)
{
itemp = i;
dmax = fabs((double)dx[ix]);
}
ix = ix + incx;
}
}
else
{
/* code for increment equal to 1 */
itemp = 0;
dmax = fabs((double)dx[0]);
for (i = 1; i < n; i++)
if(fabs((double)dx[i]) > dmax)
{
itemp = i;
dmax = fabs((double)dx[i]);
}
}
return (itemp);
}
static REAL second(void)
{
return ((REAL)((REAL)clock()/(REAL)CLOCKS_PER_SEC));
}