Skip to content

Commit 5506f5f

Browse files
authored
Slightly more performance (#94)
* update if statement, unicode symbols * remove meaningless assignment * remove redifinition of Base functions * fix type unstability, update symbols to unicode greek * update symbols to unicode greek * update symbols using Unicode greeks * move some bessel-roots related functions to besselroots.jl, and remove duplicates * remove unnecessary min function * add StaticArrays.jl for faster besselroots * add (a)inline for faster gausslegendre * add constants.jl to store all constants * fix compat table for StaticArrays.jl * bump version to v0.4.6
1 parent 0b3be86 commit 5506f5f

9 files changed

Lines changed: 322 additions & 254 deletions

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
name = "FastGaussQuadrature"
22
uuid = "442a2c76-b920-505d-bb47-c5924d526838"
3-
version = "0.4.5"
3+
version = "0.4.6"
44

55
[deps]
66
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
77
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
8+
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
89

910
[compat]
1011
SpecialFunctions = "0.7, 0.8, 0.9, 0.10, 1.0"
12+
StaticArrays = "1.0"
1113
julia = "1"
1214

1315
[extras]

src/FastGaussQuadrature.jl

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
11
module FastGaussQuadrature
22

3-
using SpecialFunctions, LinearAlgebra
4-
cumprod(A::AbstractArray) = Base.cumprod(A, dims=1)
5-
cumprod(A::AbstractArray, d::Int) = Base.cumprod(A, dims=d)
6-
sum(A::AbstractArray, n::Int) = Base.sum(A, dims=n)
7-
sum(A) = Base.sum(A)
8-
flipdim(A, d) = reverse(A, dims=d)
9-
3+
using LinearAlgebra
4+
using SpecialFunctions
5+
using StaticArrays
106

117
export gausslegendre
128
export gausschebyshev
@@ -19,6 +15,7 @@ export besselroots
1915

2016
import SpecialFunctions: besselj, airyai, airyaiprime
2117

18+
include("constants.jl")
2219
include("gausslegendre.jl")
2320
include("gausschebyshev.jl")
2421
include("gausslaguerre.jl")

src/besselroots.jl

Lines changed: 135 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,22 @@ function besselroots(ν::Float64, n::Integer)
4242
end
4343

4444
x = zeros(n)
45-
if n > 0 && ν == 0
45+
if ν == 0
4646
for k in 1:min(n,20)
4747
x[k] = J0_roots[k]
4848
end
4949
for k in min(n,20)+1:n
5050
x[k] = McMahon(ν, k)
5151
end
52-
elseif n > 0 && ν -1 && ν 5
53-
correctFirstFew = Piessens( ν )
52+
elseif -1 ν 5
53+
correctFirstFew = Piessens(ν)
5454
for k in 1:min(n,6)
5555
x[k] = correctFirstFew[k]
5656
end
5757
for k in min(n,6)+1:n
5858
x[k] = McMahon(ν, k)
5959
end
60-
elseif ν > 5
60+
elseif 5 < ν
6161
for k in 1:n
6262
x[k] = McMahon(ν, k)
6363
end
@@ -74,91 +74,148 @@ function McMahon(ν::Float64, k::Integer)
7474
# McMahon's expansion. This expansion gives very accurate approximation
7575
# for the sth zero (s ≥ 7) in the whole region ν ≥ -1, and moderate
7676
# approximation in other cases.
77-
mu = 4ν^2
77+
μ = 4ν^2
7878
a1 = 1 / 8
79-
a3 = (7mu-31) / 384
80-
a5 = 4*(3779+mu*(-982+83mu)) / 61440 # Evaluate via Horner's method.
81-
a7 = 6*(-6277237+mu*(1585743+mu*(-153855+6949mu))) / 20643840
82-
a9 = 144*(2092163573+mu*(-512062548+mu*(48010494+mu*(-2479316+70197mu)))) / 11890851840
83-
a11 = 720 *(-8249725736393+mu*(1982611456181+mu*(-179289628602+mu*(8903961290 +
84-
mu*(-287149133 + 5592657mu))))) / 10463949619200
85-
a13 = 576 *(423748443625564327 + mu*(-100847472093088506 + mu*(8929489333108377 +
86-
mu*(-426353946885548+mu*(13172003634537+mu*(-291245357370 + 4148944183mu)))))) / 13059009124761600
79+
a3 = (7μ-31) / 384
80+
a5 = 4*(3779+μ*(-982+83μ)) / 61440 # Evaluate via Horner's method.
81+
a7 = 6*(-6277237+μ*(1585743+μ*(-153855+6949μ))) / 20643840
82+
a9 = 144*(2092163573+μ*(-512062548+μ*(48010494+μ*(-2479316+70197μ)))) / 11890851840
83+
a11 = 720 *(-8249725736393+μ*(1982611456181+μ*(-179289628602+μ*(8903961290 +
84+
μ*(-287149133 + 5592657μ))))) / 10463949619200
85+
a13 = 576 *(423748443625564327 + μ*(-100847472093088506 + μ*(8929489333108377 +
86+
μ*(-426353946885548+μ*(13172003634537+μ*(-291245357370 + 4148944183μ)))))) / 13059009124761600
8787
b = 0.25 * (2ν+4k-1)*π
8888
# Evaluate using Horner's scheme:
89-
x = b - (mu-1)*( ((((((a13/b^2 + a11)/b^2 + a9)/b^2 + a7)/b^2 + a5)/b^2 + a3)/b^2 + a1)/b)
89+
x = b - (μ-1)*( ((((((a13/b^2 + a11)/b^2 + a9)/b^2 + a7)/b^2 + a5)/b^2 + a3)/b^2 + a1)/b)
9090
return x
9191
end
9292

93-
# Roots of Bessel funcion ``J_0`` in Float64.
94-
# https://mathworld.wolfram.com/BesselFunctionZeros.html
95-
const J0_roots =
96-
[ 2.4048255576957728
97-
5.5200781102863106
98-
8.6537279129110122
99-
11.791534439014281
100-
14.930917708487785
101-
18.071063967910922
102-
21.211636629879258
103-
24.352471530749302
104-
27.493479132040254
105-
30.634606468431975
106-
33.775820213573568
107-
36.917098353664044
108-
40.058425764628239
109-
43.199791713176730
110-
46.341188371661814
111-
49.482609897397817
112-
52.624051841114996
113-
55.765510755019979
114-
58.906983926080942
115-
62.048469190227170 ]
116-
117-
118-
const Piessens_C = [
119-
2.883975316228 8.263194332307 11.493871452173 14.689036505931 17.866882871378 21.034784308088
120-
0.767665211539 4.209200330779 4.317988625384 4.387437455306 4.435717974422 4.471319438161
121-
-0.086538804759 -0.164644722483 -0.130667664397 -0.109469595763 -0.094492317231 -0.083234240394
122-
0.020433979038 0.039764618826 0.023009510531 0.015359574754 0.011070071951 0.008388073020
123-
-0.006103761347 -0.011799527177 -0.004987164201 -0.002655024938 -0.001598668225 -0.001042443435
124-
0.002046841322 0.003893555229 0.001204453026 0.000511852711 0.000257620149 0.000144611721
125-
-0.000734476579 -0.001369989689 -0.000310786051 -0.000105522473 -0.000044416219 -0.000021469973
126-
0.000275336751 0.000503054700 0.000083834770 0.000022761626 0.000008016197 0.000003337753
127-
-0.000106375704 -0.000190381770 -0.000023343325 -0.000005071979 -0.000001495224 -0.000000536428
128-
0.000042003336 0.000073681222 0.000006655551 0.000001158094 0.000000285903 0.000000088402
129-
-0.000016858623 -0.000029010830 -0.000001932603 -0.000000269480 -0.000000055734 -0.000000014856
130-
0.000006852440 0.000011579131 0.000000569367 0.000000063657 0.000000011033 0.000000002536
131-
-0.000002813300 -0.000004672877 -0.000000169722 -0.000000015222 -0.000000002212 -0.000000000438
132-
0.000001164419 0.000001903082 0.000000051084 0.000000003677 0.000000000448 0.000000000077
133-
-0.000000485189 -0.000000781030 -0.000000015501 -0.000000000896 -0.000000000092 -0.000000000014
134-
0.000000203309 0.000000322648 0.000000004736 0.000000000220 0.000000000019 0.000000000002
135-
-0.000000085602 -0.000000134047 -0.000000001456 -0.000000000054 -0.000000000004 0
136-
0.000000036192 0.000000055969 0.000000000450 0.000000000013 0 0
137-
-0.000000015357 -0.000000023472 -0.000000000140 -0.000000000003 0 0
138-
0.000000006537 0.000000009882 0.000000000043 0.000000000001 0 0
139-
-0.000000002791 -0.000000004175 -0.000000000014 0 0 0
140-
0.000000001194 0.000000001770 0.000000000004 0 0 0
141-
-0.000000000512 -0.000000000752 0 0 0 0
142-
0.000000000220 0.000000000321 0 0 0 0
143-
-0.000000000095 -0.000000000137 0 0 0 0
144-
0.000000000041 0.000000000059 0 0 0 0
145-
-0.000000000018 -0.000000000025 0 0 0 0
146-
0.000000000008 0.000000000011 0 0 0 0
147-
-0.000000000003 -0.000000000005 0 0 0 0
148-
0.000000000001 0.000000000002 0 0 0 0]
93+
function _piessens_chebyshev30::Float64)
94+
# Piessens's Chebyshev series approximations (1984). Calculates the 6 first
95+
# zeros to at least 12 decimal figures in region -1 ≤ ν ≤ 5:
96+
pt =-2)/3
14997

98+
T1 = 1.0
99+
T2 = pt
100+
T3 = 2pt*T2 - T1
101+
T4 = 2pt*T3 - T2
102+
T5 = 2pt*T4 - T3
103+
T6 = 2pt*T5 - T4
104+
T7 = 2pt*T6 - T5
105+
T8 = 2pt*T7 - T6
106+
T9 = 2pt*T8 - T7
107+
T10 = 2pt*T9 - T8
108+
T11 = 2pt*T10 - T9
109+
T12 = 2pt*T11 - T10
110+
T13 = 2pt*T12 - T11
111+
T14 = 2pt*T13 - T12
112+
T15 = 2pt*T14 - T13
113+
T16 = 2pt*T15 - T14
114+
T17 = 2pt*T16 - T15
115+
T18 = 2pt*T17 - T16
116+
T19 = 2pt*T18 - T17
117+
T20 = 2pt*T19 - T18
118+
T21 = 2pt*T20 - T19
119+
T22 = 2pt*T21 - T20
120+
T23 = 2pt*T22 - T21
121+
T24 = 2pt*T23 - T22
122+
T25 = 2pt*T24 - T23
123+
T26 = 2pt*T25 - T24
124+
T27 = 2pt*T26 - T25
125+
T28 = 2pt*T27 - T26
126+
T29 = 2pt*T28 - T27
127+
T30 = 2pt*T29 - T28
128+
129+
T = SVector(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16,T17,T18,T19,T20,T21,T22,T23,T24,T25,T26,T27,T28,T29,T30)
130+
return T
131+
end
150132

151133
function Piessens::Float64)
152134
# Piessens's Chebyshev series approximations (1984). Calculates the 6 first
153135
# zeros to at least 12 decimal figures in region -1 ≤ ν ≤ 5:
154136
C = Piessens_C
155-
T = Array{Float64}(undef,size(C,1))
156-
pt =-2)/3
157-
T[1], T[2] = 1., pt
158-
for k = 2:size(C,1)-1
159-
T[k+1] = 2*pt*T[k] - T[k-1]
160-
end
137+
T = _piessens_chebyshev30(ν)
161138
y = C'*T
162-
y[1] *= sqrt+1) # Scale the first root.
163-
return y
139+
_y = collect(y)
140+
_y[1] *= sqrt+1) # Scale the first root.
141+
return _y
142+
end
143+
144+
145+
function besselZeroRoots(m)
146+
# BESSEL0ROOTS ROOTS OF BESSELJ(0,x). USE ASYMPTOTICS.
147+
# Use McMahon's expansion for the remainder (NIST, 10.21.19):
148+
jk = Array{Float64}(undef, m)
149+
p = (1071187749376 / 315, 0.0, -401743168 / 105, 0.0, 120928 / 15,
150+
0.0, -124 / 3, 0.0, 1.0, 0.0)
151+
# First 20 are precomputed:
152+
@inbounds for jj = 1:min(m, 20)
153+
jk[jj] = J0_roots[jj]
154+
end
155+
@inbounds for jj = 21:min(m, 47)
156+
ak = π * (jj - .25)
157+
ak82 = (.125 / ak)^2
158+
jk[jj] = ak + .125 / ak * @evalpoly(ak82, 1.0, p[7], p[5], p[3])
159+
end
160+
@inbounds for jj = 48:min(m, 344)
161+
ak = π * (jj - .25)
162+
ak82 = (.125 / ak)^2
163+
jk[jj] = ak + .125 / ak * @evalpoly(ak82, 1.0, p[7], p[5])
164+
end
165+
@inbounds for jj = 345:min(m,13191)
166+
ak = π * (jj - .25)
167+
ak82 = (.125 / ak)^2
168+
jk[jj] = ak + .125 / ak * muladd(ak82, p[7], 1.0)
169+
end
170+
@inbounds for jj = 13192:m
171+
ak = π * (jj - .25)
172+
jk[jj] = ak + .125 / ak
173+
end
174+
return jk
175+
end
176+
177+
function besselJ1(m)
178+
# BESSELJ1 EVALUATE BESSELJ(1,x)^2 AT ROOTS OF BESSELJ(0,x).
179+
# USE ASYMPTOTICS. Use Taylor series of (NIST, 10.17.3) and McMahon's
180+
# expansion (NIST, 10.21.19):
181+
Jk2 = Array{Float64}(undef, m)
182+
c = (-171497088497 / 15206400, 461797 / 1152, -172913 / 8064,
183+
151 / 80, -7 / 24, 0.0, 2.0)
184+
# First 10 are precomputed:
185+
@inbounds for jj = 1:min(m, 10)
186+
Jk2[jj] = besselJ1_10[jj]
187+
end
188+
@inbounds for jj = 11:min(m, 15)
189+
ak = π * (jj - .25)
190+
ak2 = (1 / ak)^2
191+
Jk2[jj] = 1 /* ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3],
192+
c[2], c[1]), ak2^2, c[7])
193+
end
194+
@inbounds for jj = 16:min(m, 21)
195+
ak = π * (jj - .25)
196+
ak2 = (1 / ak)^2
197+
Jk2[jj] = 1 /* ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3], c[2]),
198+
ak2^2, c[7])
199+
end
200+
@inbounds for jj = 22:min(m,55)
201+
ak = π * (jj - .25)
202+
ak2 = (1 / ak)^2
203+
Jk2[jj] = 1 /* ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3]),
204+
ak2^2, c[7])
205+
end
206+
@inbounds for jj = 56:min(m,279)
207+
ak = π * (jj - .25)
208+
ak2 = (1 / ak)^2
209+
Jk2[jj] = 1 /* ak) * muladd(muladd(ak2, c[4], c[5]), ak2^2, c[7])
210+
end
211+
@inbounds for jj = 280:min(m,2279)
212+
ak = π * (jj - .25)
213+
ak2 = (1 / ak)^2
214+
Jk2[jj] = 1 /* ak) * muladd(ak2^2, c[5], c[7])
215+
end
216+
@inbounds for jj = 2280:m
217+
ak = π * (jj - .25)
218+
Jk2[jj] = 1 /* ak) * c[7]
219+
end
220+
return Jk2
164221
end

0 commit comments

Comments
 (0)