Skip to content

Commit 8381483

Browse files
author
Jean Luc Bouchot
committed
Merging current main in genlib
2 parents 2013379 + f4cbe5b commit 8381483

635 files changed

Lines changed: 64054 additions & 101516 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/julia/generate_binaries.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,15 @@ for (platform, libdir, ext) in platforms
4949

5050
# Create a folder with the version number of the package
5151
mkdir("$(package)_binaries.$version2")
52-
for folder in ("lib", )
52+
for folder in ("lib", "modules")
5353
cp(folder, "$(package)_binaries.$version2/$folder")
5454
end
5555

5656
cd("$(package)_binaries.$version2")
5757
if ext == "dll"
58-
run(`zip -r --symlinks ../../../$(package)_binaries.$version2.$platform.zip lib`)
58+
run(`zip -r --symlinks ../../../$(package)_binaries.$version2.$platform.zip lib modules`)
5959
else
60-
run(`tar -czf ../../../$(package)_binaries.$version2.$platform.tar.gz lib`)
60+
run(`tar -czf ../../../$(package)_binaries.$version2.$platform.tar.gz lib modules`)
6161
end
6262
cd("../../..")
6363

BLAS/Makefile

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,18 @@
44
# Compilers and flags
55
FC = gfortran
66
CC = gcc
7-
FFLAGS = -O2 -fPIC -ffree-line-length-none -Wuninitialized -Wmaybe-uninitialized -Iinclude
8-
FFLAGS_F77 = -O2 -fPIC -ffixed-line-length-none -Wuninitialized -Wmaybe-uninitialized -Iinclude
7+
# Ensure .mod files are written to (and read from) build/
8+
# Defaults: gfortran -> -J, ifort/ifx -> -module. You can still override MODFLAG on the make command line.
9+
MODDIR = $(BUILD_DIR)
10+
ifeq ($(findstring ifort,$(FC)),ifort)
11+
MODFLAG ?= -module $(MODDIR)
12+
else ifeq ($(findstring ifx,$(FC)),ifx)
13+
MODFLAG ?= -module $(MODDIR)
14+
else
15+
MODFLAG ?= -J$(MODDIR)
16+
endif
17+
FFLAGS = -O2 -fPIC -ffree-line-length-none -Wuninitialized -Wmaybe-uninitialized -Iinclude -I$(MODDIR) $(MODFLAG)
18+
FFLAGS_F77 = -O2 -fPIC -ffixed-line-length-none -Wuninitialized -Wmaybe-uninitialized -Iinclude -I$(MODDIR)
919
CFLAGS = -O2 -fPIC
1020

1121
# Directory structure
@@ -63,7 +73,8 @@ else
6373
BLAS_LIB ?= -lrefblas
6474
endif
6575

66-
# Optional: DIFFSIZES_access when using ISIZE globals (.f or .f90+wrappers when many vars)
76+
# Optional: DIFFSIZES_access when using ISIZE globals (run_tapenade_blas.py writes .f or .f90+wrappers)
77+
# When many ISIZE vars exceed F77 COMMON line limit, generator writes DIFFSIZES_access.f90 + wrappers instead of .f
6778
# Prefer .f90 when present (may have more vars than stale .f)
6879
# Must be defined before any rule that uses it as a prerequisite, so "make forward" (etc.) builds it first.
6980
ifneq ($(wildcard $(SRC_DIR)/DIFFSIZES_access.f90),)
@@ -169,17 +180,22 @@ $(BUILD_DIR)/%_dep2.o: $(SRC_DIR)/%_dep2.f
169180
$(FC) $(FFLAGS_F77) -c $< -o $@
170181

171182
# DIFFSIZES_access - F77 .f or F90 .f90 (generator picks based on COMMON line length)
172-
# When .f90 exists: compile to produce .o and .mod; wrappers need .mod (depend on it explicitly)
183+
# When .f90 exists: compile to produce .o and .mod; wrappers depend on .mod explicitly (avoids stale .o from .f)
173184
$(BUILD_DIR)/diffsizes_access.mod: $(SRC_DIR)/DIFFSIZES_access.f90
174-
$(FC) $(FFLAGS) -J$(BUILD_DIR) -c $< -o $(BUILD_DIR)/DIFFSIZES_access.o
185+
@mkdir -p $(BUILD_DIR)
186+
$(FC) $(FFLAGS) -c $< -o $(BUILD_DIR)/DIFFSIZES_access.o
175187

188+
# When .f90 exists: DIFFSIZES_access.o is produced as byproduct of diffsizes_access.mod (do not compile .f)
189+
ifeq ($(wildcard $(SRC_DIR)/DIFFSIZES_access.f90),)
176190
$(BUILD_DIR)/DIFFSIZES_access.o: $(SRC_DIR)/DIFFSIZES_access.f
177191
$(FC) $(FFLAGS_F77) -c $< -o $@
192+
else
193+
$(BUILD_DIR)/DIFFSIZES_access.o: $(BUILD_DIR)/diffsizes_access.mod
194+
endif
178195

179196
# DIFFSIZES_access_wrappers.f - external symbols for F90 module (set_*, get_*, check_*)
180-
# Depend on .mod so we always build from .f90 when using F90 path (avoids stale .o from .f)
181197
$(BUILD_DIR)/DIFFSIZES_access_wrappers.o: $(SRC_DIR)/DIFFSIZES_access_wrappers.f $(BUILD_DIR)/diffsizes_access.mod
182-
$(FC) $(FFLAGS) -J$(BUILD_DIR) -c $(SRC_DIR)/DIFFSIZES_access_wrappers.f -o $@
198+
$(FC) $(FFLAGS) -c $(SRC_DIR)/DIFFSIZES_access_wrappers.f -o $@
183199

184200
# DIFFSIZES handling (supports both Fortran 90 module and Fortran 77 include)
185201
# For F90: DIFFSIZES.f90 is compiled to produce DIFFSIZES.o and DIFFSIZES.mod
@@ -292,31 +308,31 @@ $(BUILD_DIR)/libdiffblas_d.a: compile-d $(DIFFSIZES_ACCESS_OBJ)
292308
@echo "Created libdiffblas_d.a with $$(ls $(BUILD_DIR)/*_d.o 2>/dev/null | wc -w) objects"
293309

294310
$(BUILD_DIR)/libdiffblas_d.so: compile-d
295-
@$(FC) -shared -o $@ $$(ls $(BUILD_DIR)/*_d.o 2>/dev/null)
311+
@objs="$$(ls $(BUILD_DIR)/*_d.o 2>/dev/null)"; if [ -n "$$objs" ]; then $(FC) -shared -o $@ $$objs; else touch $@; fi
296312

297313
# Single library for all reverse mode differentiated code
298314
$(BUILD_DIR)/libdiffblas_b.a: compile-b $(DIFFSIZES_ACCESS_OBJ)
299315
@ar rcs $@ $$(ls $(BUILD_DIR)/*_b.o 2>/dev/null) $(BUILD_DIR)/adStack.o $(DIFFSIZES_ACCESS_OBJ)
300316
@echo "Created libdiffblas_b.a with $$(ls $(BUILD_DIR)/*_b.o 2>/dev/null | wc -w) objects"
301317

302318
$(BUILD_DIR)/libdiffblas_b.so: compile-b $(DIFFSIZES_ACCESS_OBJ)
303-
@$(FC) -shared -o $@ $$(ls $(BUILD_DIR)/*_b.o 2>/dev/null) $(BUILD_DIR)/adStack.o $(DIFFSIZES_ACCESS_OBJ)
319+
@objs="$$(ls $(BUILD_DIR)/*_b.o 2>/dev/null)"; if [ -n "$$objs" ]; then $(FC) -shared -o $@ $$objs $(BUILD_DIR)/adStack.o $(DIFFSIZES_ACCESS_OBJ); else touch $@; fi
304320

305321
# Single library for all vector forward mode differentiated code
306322
$(BUILD_DIR)/libdiffblas_dv.a: compile-dv $(DIFFSIZES_ACCESS_OBJ)
307323
@ar rcs $@ $$(ls $(BUILD_DIR)/*_dv.o 2>/dev/null) $(BUILD_DIR)/DIFFSIZES.o $(DIFFSIZES_ACCESS_OBJ)
308324
@echo "Created libdiffblas_dv.a with $$(ls $(BUILD_DIR)/*_dv.o 2>/dev/null | wc -w) objects"
309325

310326
$(BUILD_DIR)/libdiffblas_dv.so: compile-dv
311-
@$(FC) -shared -o $@ $$(ls $(BUILD_DIR)/*_dv.o 2>/dev/null) $(BUILD_DIR)/DIFFSIZES.o
327+
@objs="$$(ls $(BUILD_DIR)/*_dv.o 2>/dev/null)"; if [ -n "$$objs" ]; then $(FC) -shared -o $@ $$objs $(BUILD_DIR)/DIFFSIZES.o; else touch $@; fi
312328

313329
# Single library for all vector reverse mode differentiated code
314330
$(BUILD_DIR)/libdiffblas_bv.a: compile-bv $(DIFFSIZES_ACCESS_OBJ)
315331
@ar rcs $@ $$(ls $(BUILD_DIR)/*_bv.o 2>/dev/null) $(BUILD_DIR)/adStack.o $(BUILD_DIR)/DIFFSIZES.o $(DIFFSIZES_ACCESS_OBJ)
316332
@echo "Created libdiffblas_bv.a with $$(ls $(BUILD_DIR)/*_bv.o 2>/dev/null | wc -w) objects"
317333

318334
$(BUILD_DIR)/libdiffblas_bv.so: compile-bv $(DIFFSIZES_ACCESS_OBJ)
319-
@$(FC) -shared -o $@ $$(ls $(BUILD_DIR)/*_bv.o 2>/dev/null) $(BUILD_DIR)/adStack.o $(BUILD_DIR)/DIFFSIZES.o $(DIFFSIZES_ACCESS_OBJ)
335+
@objs="$$(ls $(BUILD_DIR)/*_bv.o 2>/dev/null)"; if [ -n "$$objs" ]; then $(FC) -shared -o $@ $$objs $(BUILD_DIR)/adStack.o $(BUILD_DIR)/DIFFSIZES.o $(DIFFSIZES_ACCESS_OBJ); else touch $@; fi
320336

321337
# Note: Original BLAS functions come from $(BLAS_LIB) (librefblas in LAPACKDIR)
322338
# No need to build a separate liborigblas
@@ -366,6 +382,7 @@ $(BUILD_DIR)/test_%_vector_reverse.o: $(TEST_DIR)/test_%_vector_reverse.f90 $(BU
366382
clean:
367383
@echo "Cleaning build directory..."
368384
rm -rf $(BUILD_DIR)
385+
rm -f *.mod
369386
@echo "Clean complete."
370387

371388
# Rebuild everything

BLAS/docs/TOLERANCES.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Differentiation test tolerances
2+
3+
Tolerances and step sizes for finite-difference derivative checks in the BLAS differentiation test generator.
4+
5+
---
6+
7+
## Defaults
8+
9+
### rtol/atol by precision family
10+
11+
| Family | Meaning | rtol | atol |
12+
|--------|---------|------|------|
13+
| S | `S*` (single real) | 2.0e-3 | 2.0e-3 |
14+
| C | `C*` (single complex) | 1.0e-3 | 1.0e-3 |
15+
| D | `D*` (double real) | 1.0e-5 | 1.0e-5 |
16+
| Z | `Z*` (double complex) | 1.0e-5 | 1.0e-5 |
17+
18+
### step size h by precision family
19+
20+
| Family | h |
21+
|--------|---|
22+
| S, C | 1.0e-3 |
23+
| D, Z | 1.0e-7 |
24+
25+
---
26+
27+
## Overrides
28+
29+
### Mixed-precision D* (single-precision first differentiable input)
30+
31+
Applies when the routine behaves like “double output, but first differentiable input is single precision” (e.g. `DSDOT` with **SX** first; the generator also treats **SY** and **SB** as single-precision inputs for `D*`).
32+
33+
- **Scalar forward**: override **h = 1.0e-3** (rtol/atol remain `D*` base = 1.0e-5)
34+
- **Scalar reverse / vector forward / vector reverse**: override **h = 1.0e-3**, **rtol = atol = 2.0e-3**
35+
36+
### Relaxed C* tolerance in vector reverse
37+
38+
Only for **single-precision complex** (`C*`) **vector reverse** tests:
39+
40+
| Routine family (examples) | rtol/atol |
41+
|---------------------------|-----------|
42+
| DOT (e.g. `CDOTC`) | 2.5e-2 |
43+
| BLAS3 (e.g. `CGEMM`, `CSYMM`, `CHEMM`) | 1.0e-2 |
44+
| BLAS2 banded MV (e.g. `CGBMV`, `CTBMV`, `CHBMV`) | 1.0e-2 |
45+
46+
All other `C*` modes use the base tolerance (1.0e-3). `Z*` does not use relaxed tolerances.

BLAS/include/DIFFSIZES.f90

Lines changed: 162 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,121 @@ MODULE DIFFSIZES
22
IMPLICIT NONE
33
INTEGER, PARAMETER :: nbdirsmax = 4
44
! ISIZE* are module variables (set via set_ISIZE*(), read via get_ISIZE*() or use directly after check)
5-
INTEGER, SAVE :: isize1ofx = -1, isize1ofy = -1, isize2ofa = -1
5+
INTEGER, SAVE :: isize1ofap = -1, isize1ofcx = -1, isize1ofcy = -1, isize1ofdx = -1, isize1ofdy = -1, isize1ofsx = -1, &
6+
& isize1ofsy = -1, isize1ofx = -1, isize1ofy = -1, isize1ofzx = -1, isize1ofzy = -1, isize2ofa = -1, isize2ofb = -1
67
CONTAINS
8+
SUBROUTINE set_ISIZE1OFAp(val)
9+
INTEGER, INTENT(IN) :: val
10+
isize1ofap = val
11+
END SUBROUTINE set_ISIZE1OFAp
12+
13+
INTEGER FUNCTION get_ISIZE1OFAp()
14+
get_ISIZE1OFAp = isize1ofap
15+
END FUNCTION get_ISIZE1OFAp
16+
17+
SUBROUTINE check_ISIZE1OFAp_initialized()
18+
IF (isize1ofap < 0) THEN
19+
WRITE(*,'(A)') 'Error: isize1ofap not set. Call set_ISIZE1OFAp before differentiated routine.'
20+
STOP 1
21+
END IF
22+
END SUBROUTINE check_ISIZE1OFAp_initialized
23+
24+
SUBROUTINE set_ISIZE1OFCx(val)
25+
INTEGER, INTENT(IN) :: val
26+
isize1ofcx = val
27+
END SUBROUTINE set_ISIZE1OFCx
28+
29+
INTEGER FUNCTION get_ISIZE1OFCx()
30+
get_ISIZE1OFCx = isize1ofcx
31+
END FUNCTION get_ISIZE1OFCx
32+
33+
SUBROUTINE check_ISIZE1OFCx_initialized()
34+
IF (isize1ofcx < 0) THEN
35+
WRITE(*,'(A)') 'Error: isize1ofcx not set. Call set_ISIZE1OFCx before differentiated routine.'
36+
STOP 1
37+
END IF
38+
END SUBROUTINE check_ISIZE1OFCx_initialized
39+
40+
SUBROUTINE set_ISIZE1OFCy(val)
41+
INTEGER, INTENT(IN) :: val
42+
isize1ofcy = val
43+
END SUBROUTINE set_ISIZE1OFCy
44+
45+
INTEGER FUNCTION get_ISIZE1OFCy()
46+
get_ISIZE1OFCy = isize1ofcy
47+
END FUNCTION get_ISIZE1OFCy
48+
49+
SUBROUTINE check_ISIZE1OFCy_initialized()
50+
IF (isize1ofcy < 0) THEN
51+
WRITE(*,'(A)') 'Error: isize1ofcy not set. Call set_ISIZE1OFCy before differentiated routine.'
52+
STOP 1
53+
END IF
54+
END SUBROUTINE check_ISIZE1OFCy_initialized
55+
56+
SUBROUTINE set_ISIZE1OFDx(val)
57+
INTEGER, INTENT(IN) :: val
58+
isize1ofdx = val
59+
END SUBROUTINE set_ISIZE1OFDx
60+
61+
INTEGER FUNCTION get_ISIZE1OFDx()
62+
get_ISIZE1OFDx = isize1ofdx
63+
END FUNCTION get_ISIZE1OFDx
64+
65+
SUBROUTINE check_ISIZE1OFDx_initialized()
66+
IF (isize1ofdx < 0) THEN
67+
WRITE(*,'(A)') 'Error: isize1ofdx not set. Call set_ISIZE1OFDx before differentiated routine.'
68+
STOP 1
69+
END IF
70+
END SUBROUTINE check_ISIZE1OFDx_initialized
71+
72+
SUBROUTINE set_ISIZE1OFDy(val)
73+
INTEGER, INTENT(IN) :: val
74+
isize1ofdy = val
75+
END SUBROUTINE set_ISIZE1OFDy
76+
77+
INTEGER FUNCTION get_ISIZE1OFDy()
78+
get_ISIZE1OFDy = isize1ofdy
79+
END FUNCTION get_ISIZE1OFDy
80+
81+
SUBROUTINE check_ISIZE1OFDy_initialized()
82+
IF (isize1ofdy < 0) THEN
83+
WRITE(*,'(A)') 'Error: isize1ofdy not set. Call set_ISIZE1OFDy before differentiated routine.'
84+
STOP 1
85+
END IF
86+
END SUBROUTINE check_ISIZE1OFDy_initialized
87+
88+
SUBROUTINE set_ISIZE1OFSx(val)
89+
INTEGER, INTENT(IN) :: val
90+
isize1ofsx = val
91+
END SUBROUTINE set_ISIZE1OFSx
92+
93+
INTEGER FUNCTION get_ISIZE1OFSx()
94+
get_ISIZE1OFSx = isize1ofsx
95+
END FUNCTION get_ISIZE1OFSx
96+
97+
SUBROUTINE check_ISIZE1OFSx_initialized()
98+
IF (isize1ofsx < 0) THEN
99+
WRITE(*,'(A)') 'Error: isize1ofsx not set. Call set_ISIZE1OFSx before differentiated routine.'
100+
STOP 1
101+
END IF
102+
END SUBROUTINE check_ISIZE1OFSx_initialized
103+
104+
SUBROUTINE set_ISIZE1OFSy(val)
105+
INTEGER, INTENT(IN) :: val
106+
isize1ofsy = val
107+
END SUBROUTINE set_ISIZE1OFSy
108+
109+
INTEGER FUNCTION get_ISIZE1OFSy()
110+
get_ISIZE1OFSy = isize1ofsy
111+
END FUNCTION get_ISIZE1OFSy
112+
113+
SUBROUTINE check_ISIZE1OFSy_initialized()
114+
IF (isize1ofsy < 0) THEN
115+
WRITE(*,'(A)') 'Error: isize1ofsy not set. Call set_ISIZE1OFSy before differentiated routine.'
116+
STOP 1
117+
END IF
118+
END SUBROUTINE check_ISIZE1OFSy_initialized
119+
7120
SUBROUTINE set_ISIZE1OFX(val)
8121
INTEGER, INTENT(IN) :: val
9122
isize1ofx = val
@@ -36,6 +149,38 @@ SUBROUTINE check_ISIZE1OFY_initialized()
36149
END IF
37150
END SUBROUTINE check_ISIZE1OFY_initialized
38151

152+
SUBROUTINE set_ISIZE1OFZx(val)
153+
INTEGER, INTENT(IN) :: val
154+
isize1ofzx = val
155+
END SUBROUTINE set_ISIZE1OFZx
156+
157+
INTEGER FUNCTION get_ISIZE1OFZx()
158+
get_ISIZE1OFZx = isize1ofzx
159+
END FUNCTION get_ISIZE1OFZx
160+
161+
SUBROUTINE check_ISIZE1OFZx_initialized()
162+
IF (isize1ofzx < 0) THEN
163+
WRITE(*,'(A)') 'Error: isize1ofzx not set. Call set_ISIZE1OFZx before differentiated routine.'
164+
STOP 1
165+
END IF
166+
END SUBROUTINE check_ISIZE1OFZx_initialized
167+
168+
SUBROUTINE set_ISIZE1OFZy(val)
169+
INTEGER, INTENT(IN) :: val
170+
isize1ofzy = val
171+
END SUBROUTINE set_ISIZE1OFZy
172+
173+
INTEGER FUNCTION get_ISIZE1OFZy()
174+
get_ISIZE1OFZy = isize1ofzy
175+
END FUNCTION get_ISIZE1OFZy
176+
177+
SUBROUTINE check_ISIZE1OFZy_initialized()
178+
IF (isize1ofzy < 0) THEN
179+
WRITE(*,'(A)') 'Error: isize1ofzy not set. Call set_ISIZE1OFZy before differentiated routine.'
180+
STOP 1
181+
END IF
182+
END SUBROUTINE check_ISIZE1OFZy_initialized
183+
39184
SUBROUTINE set_ISIZE2OFA(val)
40185
INTEGER, INTENT(IN) :: val
41186
isize2ofa = val
@@ -52,4 +197,20 @@ SUBROUTINE check_ISIZE2OFA_initialized()
52197
END IF
53198
END SUBROUTINE check_ISIZE2OFA_initialized
54199

200+
SUBROUTINE set_ISIZE2OFB(val)
201+
INTEGER, INTENT(IN) :: val
202+
isize2ofb = val
203+
END SUBROUTINE set_ISIZE2OFB
204+
205+
INTEGER FUNCTION get_ISIZE2OFB()
206+
get_ISIZE2OFB = isize2ofb
207+
END FUNCTION get_ISIZE2OFB
208+
209+
SUBROUTINE check_ISIZE2OFB_initialized()
210+
IF (isize2ofb < 0) THEN
211+
WRITE(*,'(A)') 'Error: isize2ofb not set. Call set_ISIZE2OFB before differentiated routine.'
212+
STOP 1
213+
END IF
214+
END SUBROUTINE check_ISIZE2OFB_initialized
215+
55216
END MODULE DIFFSIZES

0 commit comments

Comments
 (0)