first benchmarks. Don't really make sense
This commit is contained in:
parent
571558a4fe
commit
9fb09fae0d
15
benchies.md
Normal file
15
benchies.md
Normal file
|
@ -0,0 +1,15 @@
|
|||
app_kernel: runs in 0 ns
|
||||
cmsis_dsp/basicmath: runs in 0 ns
|
||||
data_structure_perf/dlist_perf: ""
|
||||
data_structure_perf/rbtree_perf: ""
|
||||
footprints: runs at least 7 minutes without output
|
||||
latency_measures: 0ns
|
||||
mbedtls: at least 3mins without output
|
||||
sched: 0ns an doesn't exit
|
||||
sched_userspace: doesn't compile
|
||||
sys_kernel: 0 ns and no exit
|
||||
|
||||
linpack:
|
||||
LTO && CFI -> ~ 4000000 KFLOPS
|
||||
LTO && !CFI -> ~ 3650000 KFLOPS
|
||||
!LTO && !CFI -> ~ 3650000 KFLOPS
|
7
benchies/linpack/CMakeLists.txt
Normal file
7
benchies/linpack/CMakeLists.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
cmake_minimum_required(VERSION 3.20.0)
|
||||
find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
|
||||
project(blinky)
|
||||
|
||||
target_sources(app PRIVATE src/main.c)
|
97
benchies/linpack/README.rst
Normal file
97
benchies/linpack/README.rst
Normal file
|
@ -0,0 +1,97 @@
|
|||
.. zephyr:code-sample:: blinky
|
||||
:name: Blinky
|
||||
:relevant-api: gpio_interface
|
||||
|
||||
Blink an LED forever using the GPIO API.
|
||||
|
||||
Overview
|
||||
********
|
||||
|
||||
The Blinky sample blinks an LED forever using the :ref:`GPIO API <gpio_api>`.
|
||||
|
||||
The source code shows how to:
|
||||
|
||||
#. Get a pin specification from the :ref:`devicetree <dt-guide>` as a
|
||||
:c:struct:`gpio_dt_spec`
|
||||
#. Configure the GPIO pin as an output
|
||||
#. Toggle the pin forever
|
||||
|
||||
See :zephyr:code-sample:`pwm-blinky` for a similar sample that uses the PWM API instead.
|
||||
|
||||
.. _blinky-sample-requirements:
|
||||
|
||||
Requirements
|
||||
************
|
||||
|
||||
Your board must:
|
||||
|
||||
#. Have an LED connected via a GPIO pin (these are called "User LEDs" on many of
|
||||
Zephyr's :ref:`boards`).
|
||||
#. Have the LED configured using the ``led0`` devicetree alias.
|
||||
|
||||
Building and Running
|
||||
********************
|
||||
|
||||
Build and flash Blinky as follows, changing ``reel_board`` for your board:
|
||||
|
||||
.. zephyr-app-commands::
|
||||
:zephyr-app: samples/basic/blinky
|
||||
:board: reel_board
|
||||
:goals: build flash
|
||||
:compact:
|
||||
|
||||
After flashing, the LED starts to blink and messages with the current LED state
|
||||
are printed on the console. If a runtime error occurs, the sample exits without
|
||||
printing to the console.
|
||||
|
||||
Build errors
|
||||
************
|
||||
|
||||
You will see a build error at the source code line defining the ``struct
|
||||
gpio_dt_spec led`` variable if you try to build Blinky for an unsupported
|
||||
board.
|
||||
|
||||
On GCC-based toolchains, the error looks like this:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
error: '__device_dts_ord_DT_N_ALIAS_led_P_gpios_IDX_0_PH_ORD' undeclared here (not in a function)
|
||||
|
||||
Adding board support
|
||||
********************
|
||||
|
||||
To add support for your board, add something like this to your devicetree:
|
||||
|
||||
.. code-block:: DTS
|
||||
|
||||
/ {
|
||||
aliases {
|
||||
led0 = &myled0;
|
||||
};
|
||||
|
||||
leds {
|
||||
compatible = "gpio-leds";
|
||||
myled0: led_0 {
|
||||
gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
The above sets your board's ``led0`` alias to use pin 13 on GPIO controller
|
||||
``gpio0``. The pin flags :c:macro:`GPIO_ACTIVE_HIGH` mean the LED is on when
|
||||
the pin is set to its high state, and off when the pin is in its low state.
|
||||
|
||||
Tips:
|
||||
|
||||
- See :dtcompatible:`gpio-leds` for more information on defining GPIO-based LEDs
|
||||
in devicetree.
|
||||
|
||||
- If you're not sure what to do, check the devicetrees for supported boards which
|
||||
use the same SoC as your target. See :ref:`get-devicetree-outputs` for details.
|
||||
|
||||
- See :zephyr_file:`include/zephyr/dt-bindings/gpio/gpio.h` for the flags you can use
|
||||
in devicetree.
|
||||
|
||||
- If the LED is built in to your board hardware, the alias should be defined in
|
||||
your :ref:`BOARD.dts file <devicetree-in-out-files>`. Otherwise, you can
|
||||
define one in a :ref:`devicetree overlay <set-devicetree-overlays>`.
|
9
benchies/linpack/prj.conf
Normal file
9
benchies/linpack/prj.conf
Normal file
|
@ -0,0 +1,9 @@
|
|||
CONFIG_GPIO=y
|
||||
#CONFIG_ASAN=y
|
||||
#CONFIG_CFI=y
|
||||
CONFIG_LLVM_USE_LLD=y
|
||||
#CONFIG_LTO=y
|
||||
|
||||
#CONFIG_DEBUG=y
|
||||
#CONFIG_DEBUG_INFO=y
|
||||
#CONFIG_DEBUG_OPTIMIZATIONS=y
|
12
benchies/linpack/sample.yaml
Normal file
12
benchies/linpack/sample.yaml
Normal file
|
@ -0,0 +1,12 @@
|
|||
sample:
|
||||
name: Blinky Sample
|
||||
tests:
|
||||
sample.basic.blinky:
|
||||
tags:
|
||||
- LED
|
||||
- gpio
|
||||
filter: dt_enabled_alias_with_parent_compat("led0", "gpio-leds")
|
||||
depends_on: gpio
|
||||
harness: led
|
||||
integration_platforms:
|
||||
- frdm_k64f
|
907
benchies/linpack/src/main.c
Normal file
907
benchies/linpack/src/main.c
Normal file
|
@ -0,0 +1,907 @@
|
|||
/*
|
||||
**
|
||||
** LINPACK.C Linpack benchmark, calculates FLOPS.
|
||||
** (FLoating Point Operations Per Second)
|
||||
**
|
||||
** Translated to C by Bonnie Toy 5/88
|
||||
**
|
||||
** Modified by Will Menninger, 10/93, with these features:
|
||||
** (modified on 2/25/94 to fix a problem with daxpy for
|
||||
** unequal increments or equal increments not equal to 1.
|
||||
** Jack Dongarra)
|
||||
**
|
||||
** - Defaults to double precision.
|
||||
** - Averages ROLLed and UNROLLed performance.
|
||||
** - User selectable array sizes.
|
||||
** - Automatically does enough repetitions to take at least 10 CPU seconds.
|
||||
** - Prints machine precision.
|
||||
** - ANSI prototyping.
|
||||
**
|
||||
** Modified by ict@nfinit.systems, 12/18, with these features:
|
||||
**
|
||||
** - Improved double precision defaulting to allow -DSP to work again
|
||||
** - Can now take the array size as an argument for automation purposes
|
||||
** - Main function return type changed to integer for automation purposes
|
||||
** - Re-organized output for cleaner reports
|
||||
**
|
||||
** To compile: cc -O -o linpack linpack.c -lm
|
||||
**
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include <float.h>
|
||||
|
||||
#ifndef SP
|
||||
#ifndef DP
|
||||
#define DP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef SP
|
||||
#define ZERO 0.0
|
||||
#define ONE 1.0
|
||||
#define PREC "Single"
|
||||
#define BASE10DIG FLT_DIG
|
||||
|
||||
typedef float REAL;
|
||||
#endif
|
||||
|
||||
#ifdef DP
|
||||
#define ZERO 0.0e0
|
||||
#define ONE 1.0e0
|
||||
#define PREC "Double"
|
||||
#define BASE10DIG DBL_DIG
|
||||
|
||||
typedef double REAL;
|
||||
#endif
|
||||
|
||||
/* 2022-07-26: Macro defined for memreq variable to resolve warnings
|
||||
* during malloc check
|
||||
*/
|
||||
#define MEM_T long
|
||||
|
||||
static REAL linpack (long nreps,int arsize);
|
||||
static void matgen (REAL *a,int lda,int n,REAL *b,REAL *norma);
|
||||
static void dgefa (REAL *a,int lda,int n,int *ipvt,int *info,int roll);
|
||||
static void dgesl (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll);
|
||||
static void daxpy_r (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
|
||||
static REAL ddot_r (int n,REAL *dx,int incx,REAL *dy,int incy);
|
||||
static void dscal_r (int n,REAL da,REAL *dx,int incx);
|
||||
static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
|
||||
static REAL ddot_ur (int n,REAL *dx,int incx,REAL *dy,int incy);
|
||||
static void dscal_ur (int n,REAL da,REAL *dx,int incx);
|
||||
static int idamax (int n,REAL *dx,int incx);
|
||||
static REAL second (void);
|
||||
|
||||
static void *mempool;
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
|
||||
{
|
||||
char buf[80];
|
||||
int arsize;
|
||||
long arsize2d,nreps;
|
||||
size_t malloc_arg;
|
||||
MEM_T memreq;
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (argc < 2)
|
||||
{
|
||||
printf("Enter array size (q to quit) [100]: ");
|
||||
fgets(buf,79,stdin);
|
||||
}
|
||||
if (buf[0]=='q' || buf[0]=='Q')
|
||||
break;
|
||||
if (buf[0]=='\0' || buf[0]=='\n')
|
||||
arsize=100;
|
||||
else
|
||||
arsize=atoi(buf);
|
||||
if (argc > 1)
|
||||
arsize=atoi(argv[1]);
|
||||
arsize/=2;
|
||||
arsize*=2;
|
||||
if (arsize<10)
|
||||
{
|
||||
printf("Too small.\n");
|
||||
if (argc > 1) break;
|
||||
continue;
|
||||
}
|
||||
arsize2d = (long)arsize*(long)arsize;
|
||||
memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int);
|
||||
malloc_arg=(size_t)memreq;
|
||||
if ((MEM_T)malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL)
|
||||
{
|
||||
printf("Not enough memory available for given array size.\n");
|
||||
if (argc > 1) break;
|
||||
continue;
|
||||
}
|
||||
printf("LINPACK benchmark, %s precision.\n",PREC);
|
||||
printf("Machine precision: %d digits.\n",BASE10DIG);
|
||||
printf("Array size %d X %d.\n",arsize,arsize);
|
||||
printf("Memory required: %ldK.\n",(memreq+512L)>>10);
|
||||
printf("Average rolled and unrolled performance:\n\n");
|
||||
printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n");
|
||||
printf("----------------------------------------------------\n");
|
||||
nreps=1;
|
||||
while (linpack(nreps,arsize)<10.)
|
||||
nreps*=2;
|
||||
free(mempool);
|
||||
printf("\n");
|
||||
if (argc > 1) break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static REAL linpack(long nreps,int arsize)
|
||||
|
||||
{
|
||||
REAL *a,*b;
|
||||
REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
|
||||
int *ipvt,n,info,lda;
|
||||
long i,arsize2d;
|
||||
|
||||
lda = arsize;
|
||||
n = arsize/2;
|
||||
arsize2d = (long)arsize*(long)arsize;
|
||||
ops=((2.0*n*n*n)/3.0+2.0*n*n);
|
||||
a=(REAL *)mempool;
|
||||
b=a+arsize2d;
|
||||
ipvt=(int *)&b[arsize];
|
||||
tdgesl=0;
|
||||
tdgefa=0;
|
||||
totalt=second();
|
||||
for (i=0;i<nreps;i++)
|
||||
{
|
||||
matgen(a,lda,n,b,&norma);
|
||||
t1 = second();
|
||||
dgefa(a,lda,n,ipvt,&info,1);
|
||||
tdgefa += second()-t1;
|
||||
t1 = second();
|
||||
dgesl(a,lda,n,ipvt,b,0,1);
|
||||
tdgesl += second()-t1;
|
||||
}
|
||||
for (i=0;i<nreps;i++)
|
||||
{
|
||||
matgen(a,lda,n,b,&norma);
|
||||
t1 = second();
|
||||
dgefa(a,lda,n,ipvt,&info,0);
|
||||
tdgefa += second()-t1;
|
||||
t1 = second();
|
||||
dgesl(a,lda,n,ipvt,b,0,0);
|
||||
tdgesl += second()-t1;
|
||||
}
|
||||
totalt=second()-totalt;
|
||||
if (totalt<0.5 || tdgefa+tdgesl<0.2)
|
||||
return(0.);
|
||||
kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
|
||||
toverhead=totalt-tdgefa-tdgesl;
|
||||
if (tdgefa<0.)
|
||||
tdgefa=0.;
|
||||
if (tdgesl<0.)
|
||||
tdgesl=0.;
|
||||
if (toverhead<0.)
|
||||
toverhead=0.;
|
||||
printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n",
|
||||
nreps,totalt,100.*tdgefa/totalt,
|
||||
100.*tdgesl/totalt,100.*toverhead/totalt,
|
||||
kflops);
|
||||
return(totalt);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** For matgen,
|
||||
** We would like to declare a[][lda], but c does not allow it. In this
|
||||
** function, references to a[i][j] are written a[lda*i+j].
|
||||
*/
|
||||
static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma)
|
||||
|
||||
{
|
||||
int init,i,j;
|
||||
|
||||
init = 1325;
|
||||
*norma = 0.0;
|
||||
for (j = 0; j < n; j++)
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
init = (int)((long)3125*(long)init % 65536L);
|
||||
a[lda*j+i] = (init - 32768.0)/16384.0;
|
||||
*norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
|
||||
}
|
||||
for (i = 0; i < n; i++)
|
||||
b[i] = 0.0;
|
||||
for (j = 0; j < n; j++)
|
||||
for (i = 0; i < n; i++)
|
||||
b[i] = b[i] + a[lda*j+i];
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
**
|
||||
** DGEFA benchmark
|
||||
**
|
||||
** We would like to declare a[][lda], but c does not allow it. In this
|
||||
** function, references to a[i][j] are written a[lda*i+j].
|
||||
**
|
||||
** dgefa factors a double precision matrix by gaussian elimination.
|
||||
**
|
||||
** dgefa is usually called by dgeco, but it can be called
|
||||
** directly with a saving in time if rcond is not needed.
|
||||
** (time for dgeco) = (1 + 9/n)*(time for dgefa) .
|
||||
**
|
||||
** on entry
|
||||
**
|
||||
** a REAL precision[n][lda]
|
||||
** the matrix to be factored.
|
||||
**
|
||||
** lda integer
|
||||
** the leading dimension of the array a .
|
||||
**
|
||||
** n integer
|
||||
** the order of the matrix a .
|
||||
**
|
||||
** on return
|
||||
**
|
||||
** a an upper triangular matrix and the multipliers
|
||||
** which were used to obtain it.
|
||||
** the factorization can be written a = l*u where
|
||||
** l is a product of permutation and unit lower
|
||||
** triangular matrices and u is upper triangular.
|
||||
**
|
||||
** ipvt integer[n]
|
||||
** an integer vector of pivot indices.
|
||||
**
|
||||
** info integer
|
||||
** = 0 normal value.
|
||||
** = k if u[k][k] .eq. 0.0 . this is not an error
|
||||
** condition for this subroutine, but it does
|
||||
** indicate that dgesl or dgedi will divide by zero
|
||||
** if called. use rcond in dgeco for a reliable
|
||||
** indication of singularity.
|
||||
**
|
||||
** linpack. this version dated 08/14/78 .
|
||||
** cleve moler, university of New Mexico, argonne national lab.
|
||||
**
|
||||
** functions
|
||||
**
|
||||
** blas daxpy,dscal,idamax
|
||||
**
|
||||
*/
|
||||
static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll)
|
||||
|
||||
{
|
||||
REAL t;
|
||||
int j,k,kp1,l,nm1;
|
||||
|
||||
/* gaussian elimination with partial pivoting */
|
||||
|
||||
if (roll)
|
||||
{
|
||||
*info = 0;
|
||||
nm1 = n - 1;
|
||||
if (nm1 >= 0)
|
||||
for (k = 0; k < nm1; k++)
|
||||
{
|
||||
kp1 = k + 1;
|
||||
|
||||
/* find l = pivot index */
|
||||
|
||||
l = idamax(n-k,&a[lda*k+k],1) + k;
|
||||
ipvt[k] = l;
|
||||
|
||||
/* zero pivot implies this column already
|
||||
triangularized */
|
||||
|
||||
if (a[lda*k+l] != ZERO)
|
||||
{
|
||||
|
||||
/* interchange if necessary */
|
||||
|
||||
if (l != k)
|
||||
{
|
||||
t = a[lda*k+l];
|
||||
a[lda*k+l] = a[lda*k+k];
|
||||
a[lda*k+k] = t;
|
||||
}
|
||||
|
||||
/* compute multipliers */
|
||||
|
||||
t = -ONE/a[lda*k+k];
|
||||
dscal_r(n-(k+1),t,&a[lda*k+k+1],1);
|
||||
|
||||
/* row elimination with column indexing */
|
||||
|
||||
for (j = kp1; j < n; j++)
|
||||
{
|
||||
t = a[lda*j+l];
|
||||
if (l != k)
|
||||
{
|
||||
a[lda*j+l] = a[lda*j+k];
|
||||
a[lda*j+k] = t;
|
||||
}
|
||||
daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
|
||||
}
|
||||
}
|
||||
else
|
||||
(*info) = k;
|
||||
}
|
||||
ipvt[n-1] = n-1;
|
||||
if (a[lda*(n-1)+(n-1)] == ZERO)
|
||||
(*info) = n-1;
|
||||
}
|
||||
else
|
||||
{
|
||||
*info = 0;
|
||||
nm1 = n - 1;
|
||||
if (nm1 >= 0)
|
||||
for (k = 0; k < nm1; k++)
|
||||
{
|
||||
kp1 = k + 1;
|
||||
|
||||
/* find l = pivot index */
|
||||
|
||||
l = idamax(n-k,&a[lda*k+k],1) + k;
|
||||
ipvt[k] = l;
|
||||
|
||||
/* zero pivot implies this column already
|
||||
triangularized */
|
||||
|
||||
if (a[lda*k+l] != ZERO)
|
||||
{
|
||||
|
||||
/* interchange if necessary */
|
||||
|
||||
if (l != k)
|
||||
{
|
||||
t = a[lda*k+l];
|
||||
a[lda*k+l] = a[lda*k+k];
|
||||
a[lda*k+k] = t;
|
||||
}
|
||||
|
||||
/* compute multipliers */
|
||||
|
||||
t = -ONE/a[lda*k+k];
|
||||
dscal_ur(n-(k+1),t,&a[lda*k+k+1],1);
|
||||
|
||||
/* row elimination with column indexing */
|
||||
|
||||
for (j = kp1; j < n; j++)
|
||||
{
|
||||
t = a[lda*j+l];
|
||||
if (l != k)
|
||||
{
|
||||
a[lda*j+l] = a[lda*j+k];
|
||||
a[lda*j+k] = t;
|
||||
}
|
||||
daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
|
||||
}
|
||||
}
|
||||
else
|
||||
(*info) = k;
|
||||
}
|
||||
ipvt[n-1] = n-1;
|
||||
if (a[lda*(n-1)+(n-1)] == ZERO)
|
||||
(*info) = n-1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
**
|
||||
** DGESL benchmark
|
||||
**
|
||||
** We would like to declare a[][lda], but c does not allow it. In this
|
||||
** function, references to a[i][j] are written a[lda*i+j].
|
||||
**
|
||||
** dgesl solves the double precision system
|
||||
** a * x = b or trans(a) * x = b
|
||||
** using the factors computed by dgeco or dgefa.
|
||||
**
|
||||
** on entry
|
||||
**
|
||||
** a double precision[n][lda]
|
||||
** the output from dgeco or dgefa.
|
||||
**
|
||||
** lda integer
|
||||
** the leading dimension of the array a .
|
||||
**
|
||||
** n integer
|
||||
** the order of the matrix a .
|
||||
**
|
||||
** ipvt integer[n]
|
||||
** the pivot vector from dgeco or dgefa.
|
||||
**
|
||||
** b double precision[n]
|
||||
** the right hand side vector.
|
||||
**
|
||||
** job integer
|
||||
** = 0 to solve a*x = b ,
|
||||
** = nonzero to solve trans(a)*x = b where
|
||||
** trans(a) is the transpose.
|
||||
**
|
||||
** on return
|
||||
**
|
||||
** b the solution vector x .
|
||||
**
|
||||
** error condition
|
||||
**
|
||||
** a division by zero will occur if the input factor contains a
|
||||
** zero on the diagonal. technically this indicates singularity
|
||||
** but it is often caused by improper arguments or improper
|
||||
** setting of lda . it will not occur if the subroutines are
|
||||
** called correctly and if dgeco has set rcond .gt. 0.0
|
||||
** or dgefa has set info .eq. 0 .
|
||||
**
|
||||
** to compute inverse(a) * c where c is a matrix
|
||||
** with p columns
|
||||
** dgeco(a,lda,n,ipvt,rcond,z)
|
||||
** if (!rcond is too small){
|
||||
** for (j=0,j<p,j++)
|
||||
** dgesl(a,lda,n,ipvt,c[j][0],0);
|
||||
** }
|
||||
**
|
||||
** linpack. this version dated 08/14/78 .
|
||||
** cleve moler, university of new mexico, argonne national lab.
|
||||
**
|
||||
** functions
|
||||
**
|
||||
** blas daxpy,ddot
|
||||
*/
|
||||
static void dgesl(REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll)
|
||||
|
||||
{
|
||||
REAL t;
|
||||
int k,kb,l,nm1;
|
||||
|
||||
if (roll)
|
||||
{
|
||||
nm1 = n - 1;
|
||||
if (job == 0)
|
||||
{
|
||||
|
||||
/* job = 0 , solve a * x = b */
|
||||
/* first solve l*y = b */
|
||||
|
||||
if (nm1 >= 1)
|
||||
for (k = 0; k < nm1; k++)
|
||||
{
|
||||
l = ipvt[k];
|
||||
t = b[l];
|
||||
if (l != k)
|
||||
{
|
||||
b[l] = b[k];
|
||||
b[k] = t;
|
||||
}
|
||||
daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
|
||||
}
|
||||
|
||||
/* now solve u*x = y */
|
||||
|
||||
for (kb = 0; kb < n; kb++)
|
||||
{
|
||||
k = n - (kb + 1);
|
||||
b[k] = b[k]/a[lda*k+k];
|
||||
t = -b[k];
|
||||
daxpy_r(k,t,&a[lda*k+0],1,&b[0],1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* job = nonzero, solve trans(a) * x = b */
|
||||
/* first solve trans(u)*y = b */
|
||||
|
||||
for (k = 0; k < n; k++)
|
||||
{
|
||||
t = ddot_r(k,&a[lda*k+0],1,&b[0],1);
|
||||
b[k] = (b[k] - t)/a[lda*k+k];
|
||||
}
|
||||
|
||||
/* now solve trans(l)*x = y */
|
||||
|
||||
if (nm1 >= 1)
|
||||
for (kb = 1; kb < nm1; kb++)
|
||||
{
|
||||
k = n - (kb+1);
|
||||
b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
|
||||
l = ipvt[k];
|
||||
if (l != k)
|
||||
{
|
||||
t = b[l];
|
||||
b[l] = b[k];
|
||||
b[k] = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nm1 = n - 1;
|
||||
if (job == 0)
|
||||
{
|
||||
|
||||
/* job = 0 , solve a * x = b */
|
||||
/* first solve l*y = b */
|
||||
|
||||
if (nm1 >= 1)
|
||||
for (k = 0; k < nm1; k++)
|
||||
{
|
||||
l = ipvt[k];
|
||||
t = b[l];
|
||||
if (l != k)
|
||||
{
|
||||
b[l] = b[k];
|
||||
b[k] = t;
|
||||
}
|
||||
daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
|
||||
}
|
||||
|
||||
/* now solve u*x = y */
|
||||
|
||||
for (kb = 0; kb < n; kb++)
|
||||
{
|
||||
k = n - (kb + 1);
|
||||
b[k] = b[k]/a[lda*k+k];
|
||||
t = -b[k];
|
||||
daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* job = nonzero, solve trans(a) * x = b */
|
||||
/* first solve trans(u)*y = b */
|
||||
|
||||
for (k = 0; k < n; k++)
|
||||
{
|
||||
t = ddot_ur(k,&a[lda*k+0],1,&b[0],1);
|
||||
b[k] = (b[k] - t)/a[lda*k+k];
|
||||
}
|
||||
|
||||
/* now solve trans(l)*x = y */
|
||||
|
||||
if (nm1 >= 1)
|
||||
for (kb = 1; kb < nm1; kb++)
|
||||
{
|
||||
k = n - (kb+1);
|
||||
b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
|
||||
l = ipvt[k];
|
||||
if (l != k)
|
||||
{
|
||||
t = b[l];
|
||||
b[l] = b[k];
|
||||
b[k] = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** Constant times a vector plus a vector.
|
||||
** Jack Dongarra, linpack, 3/11/78.
|
||||
** ROLLED version
|
||||
*/
|
||||
static void daxpy_r(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
|
||||
|
||||
{
|
||||
int i,ix,iy;
|
||||
|
||||
if (n <= 0)
|
||||
return;
|
||||
if (da == ZERO)
|
||||
return;
|
||||
|
||||
if (incx != 1 || incy != 1)
|
||||
{
|
||||
|
||||
/* code for unequal increments or equal increments != 1 */
|
||||
|
||||
ix = 1;
|
||||
iy = 1;
|
||||
if(incx < 0) ix = (-n+1)*incx + 1;
|
||||
if(incy < 0)iy = (-n+1)*incy + 1;
|
||||
for (i = 0;i < n; i++)
|
||||
{
|
||||
dy[iy] = dy[iy] + da*dx[ix];
|
||||
ix = ix + incx;
|
||||
iy = iy + incy;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* code for both increments equal to 1 */
|
||||
|
||||
for (i = 0;i < n; i++)
|
||||
dy[i] = dy[i] + da*dx[i];
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Forms the dot product of two vectors.
|
||||
** Jack Dongarra, linpack, 3/11/78.
|
||||
** ROLLED version
|
||||
*/
|
||||
static REAL ddot_r(int n,REAL *dx,int incx,REAL *dy,int incy)
|
||||
|
||||
{
|
||||
REAL dtemp;
|
||||
int i,ix,iy;
|
||||
|
||||
dtemp = ZERO;
|
||||
|
||||
if (n <= 0)
|
||||
return(ZERO);
|
||||
|
||||
if (incx != 1 || incy != 1)
|
||||
{
|
||||
|
||||
/* code for unequal increments or equal increments != 1 */
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
if (incx < 0) ix = (-n+1)*incx;
|
||||
if (incy < 0) iy = (-n+1)*incy;
|
||||
for (i = 0;i < n; i++)
|
||||
{
|
||||
dtemp = dtemp + dx[ix]*dy[iy];
|
||||
ix = ix + incx;
|
||||
iy = iy + incy;
|
||||
}
|
||||
return(dtemp);
|
||||
}
|
||||
|
||||
/* code for both increments equal to 1 */
|
||||
|
||||
for (i=0;i < n; i++)
|
||||
dtemp = dtemp + dx[i]*dy[i];
|
||||
return(dtemp);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Scales a vector by a constant.
|
||||
** Jack Dongarra, linpack, 3/11/78.
|
||||
** ROLLED version
|
||||
*/
|
||||
static void dscal_r(int n,REAL da,REAL *dx,int incx)
|
||||
|
||||
{
|
||||
int i,nincx;
|
||||
|
||||
if (n <= 0)
|
||||
return;
|
||||
if (incx != 1)
|
||||
{
|
||||
|
||||
/* code for increment not equal to 1 */
|
||||
|
||||
nincx = n*incx;
|
||||
for (i = 0; i < nincx; i = i + incx)
|
||||
dx[i] = da*dx[i];
|
||||
return;
|
||||
}
|
||||
|
||||
/* code for increment equal to 1 */
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
dx[i] = da*dx[i];
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** constant times a vector plus a vector.
|
||||
** Jack Dongarra, linpack, 3/11/78.
|
||||
** UNROLLED version
|
||||
*/
|
||||
static void daxpy_ur(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
|
||||
|
||||
{
|
||||
int i,ix,iy,m;
|
||||
|
||||
if (n <= 0)
|
||||
return;
|
||||
if (da == ZERO)
|
||||
return;
|
||||
|
||||
if (incx != 1 || incy != 1)
|
||||
{
|
||||
|
||||
/* code for unequal increments or equal increments != 1 */
|
||||
|
||||
ix = 1;
|
||||
iy = 1;
|
||||
if(incx < 0) ix = (-n+1)*incx + 1;
|
||||
if(incy < 0)iy = (-n+1)*incy + 1;
|
||||
for (i = 0;i < n; i++)
|
||||
{
|
||||
dy[iy] = dy[iy] + da*dx[ix];
|
||||
ix = ix + incx;
|
||||
iy = iy + incy;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* code for both increments equal to 1 */
|
||||
|
||||
m = n % 4;
|
||||
if ( m != 0)
|
||||
{
|
||||
for (i = 0; i < m; i++)
|
||||
dy[i] = dy[i] + da*dx[i];
|
||||
if (n < 4)
|
||||
return;
|
||||
}
|
||||
for (i = m; i < n; i = i + 4)
|
||||
{
|
||||
dy[i] = dy[i] + da*dx[i];
|
||||
dy[i+1] = dy[i+1] + da*dx[i+1];
|
||||
dy[i+2] = dy[i+2] + da*dx[i+2];
|
||||
dy[i+3] = dy[i+3] + da*dx[i+3];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Forms the dot product of two vectors.
|
||||
** Jack Dongarra, linpack, 3/11/78.
|
||||
** UNROLLED version
|
||||
*/
|
||||
static REAL ddot_ur(int n,REAL *dx,int incx,REAL *dy,int incy)
|
||||
|
||||
{
|
||||
REAL dtemp;
|
||||
int i,ix,iy,m;
|
||||
|
||||
dtemp = ZERO;
|
||||
|
||||
if (n <= 0)
|
||||
return(ZERO);
|
||||
|
||||
if (incx != 1 || incy != 1)
|
||||
{
|
||||
|
||||
/* code for unequal increments or equal increments != 1 */
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
if (incx < 0) ix = (-n+1)*incx;
|
||||
if (incy < 0) iy = (-n+1)*incy;
|
||||
for (i = 0;i < n; i++)
|
||||
{
|
||||
dtemp = dtemp + dx[ix]*dy[iy];
|
||||
ix = ix + incx;
|
||||
iy = iy + incy;
|
||||
}
|
||||
return(dtemp);
|
||||
}
|
||||
|
||||
/* code for both increments equal to 1 */
|
||||
|
||||
m = n % 5;
|
||||
if (m != 0)
|
||||
{
|
||||
for (i = 0; i < m; i++)
|
||||
dtemp = dtemp + dx[i]*dy[i];
|
||||
if (n < 5)
|
||||
return(dtemp);
|
||||
}
|
||||
for (i = m; i < n; i = i + 5)
|
||||
{
|
||||
dtemp = dtemp + dx[i]*dy[i] +
|
||||
dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
|
||||
dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
|
||||
}
|
||||
return(dtemp);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Scales a vector by a constant.
|
||||
** Jack Dongarra, linpack, 3/11/78.
|
||||
** UNROLLED version
|
||||
*/
|
||||
static void dscal_ur(int n,REAL da,REAL *dx,int incx)
|
||||
|
||||
{
|
||||
int i,m,nincx;
|
||||
|
||||
if (n <= 0)
|
||||
return;
|
||||
if (incx != 1)
|
||||
{
|
||||
|
||||
/* code for increment not equal to 1 */
|
||||
|
||||
nincx = n*incx;
|
||||
for (i = 0; i < nincx; i = i + incx)
|
||||
dx[i] = da*dx[i];
|
||||
return;
|
||||
}
|
||||
|
||||
/* code for increment equal to 1 */
|
||||
|
||||
m = n % 5;
|
||||
if (m != 0)
|
||||
{
|
||||
for (i = 0; i < m; i++)
|
||||
dx[i] = da*dx[i];
|
||||
if (n < 5)
|
||||
return;
|
||||
}
|
||||
for (i = m; i < n; i = i + 5)
|
||||
{
|
||||
dx[i] = da*dx[i];
|
||||
dx[i+1] = da*dx[i+1];
|
||||
dx[i+2] = da*dx[i+2];
|
||||
dx[i+3] = da*dx[i+3];
|
||||
dx[i+4] = da*dx[i+4];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Finds the index of element having max. absolute value.
|
||||
** Jack Dongarra, linpack, 3/11/78.
|
||||
*/
|
||||
static int idamax(int n,REAL *dx,int incx)
|
||||
|
||||
{
|
||||
REAL dmax;
|
||||
int i, ix, itemp;
|
||||
|
||||
if (n < 1)
|
||||
return(-1);
|
||||
if (n ==1 )
|
||||
return(0);
|
||||
if(incx != 1)
|
||||
{
|
||||
|
||||
/* code for increment not equal to 1 */
|
||||
|
||||
ix = 1;
|
||||
dmax = fabs((double)dx[0]);
|
||||
ix = ix + incx;
|
||||
for (i = 1; i < n; i++)
|
||||
{
|
||||
if(fabs((double)dx[ix]) > dmax)
|
||||
{
|
||||
itemp = i;
|
||||
dmax = fabs((double)dx[ix]);
|
||||
}
|
||||
ix = ix + incx;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* code for increment equal to 1 */
|
||||
|
||||
itemp = 0;
|
||||
dmax = fabs((double)dx[0]);
|
||||
for (i = 1; i < n; i++)
|
||||
if(fabs((double)dx[i]) > dmax)
|
||||
{
|
||||
itemp = i;
|
||||
dmax = fabs((double)dx[i]);
|
||||
}
|
||||
}
|
||||
return (itemp);
|
||||
}
|
||||
|
||||
|
||||
static REAL second(void)
|
||||
|
||||
{
|
||||
return ((REAL)((REAL)clock()/(REAL)CLOCKS_PER_SEC));
|
||||
}
|
||||
|
||||
|
Loading…
Reference in a new issue