Implement boundary asymptotics (#131)

pbeckman · web-flow · commit bd8211317797 · 2023-12-09T12:19:16.000+09:00
* implement boundary asymptotics with only low order terms (tests will fail by a digit without higher order terms)

* use saved chebfun integration and differentiation matrices

* 15 digit accuracy thanks to besselTaylor

* move constants into constants.jl

* added test to cover barycentric interp, minor renaming of new functions

* remove unreached lines for testing coverage

* remove innerjacobi_rec which has been replaced by feval_asy2

* bump version to 1.0.1
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastGaussQuadrature"
 uuid = "442a2c76-b920-505d-bb47-c5924d526838"
-version = "1.0.0"
+version = "1.0.1"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/constants.jl b/src/constants.jl
@@ -125,3 +125,38 @@ const AIRY_ROOTS = @SVector [
     -12.828776752865757,
     -13.69148903521072,
 ]
+
+@doc raw"""
+10-point Chebyshev type 2 integration matrix computed using Chebfun. 
+
+Used for numerical integration in boundary asymptotics for Gauss-Jacobi.
+"""
+const CUMSUMMAT_10 = [
+        0 0 0 0 0 0 0 0 0 0;
+        0.019080722834519 0.0496969890549313 -0.0150585059796021 0.0126377679164575 -0.0118760811432484 0.0115424841953298 -0.0113725236133433 0.0112812076497144 -0.011235316890839 0.00561063519017238;
+        0.000812345683614654 0.14586999854807 0.0976007154946748 -0.0146972757610091 0.00680984376276729 -0.00401953146146086 0.00271970678005437 -0.00205195604894289 0.00172405556686793 -0.000812345683614662;
+        0.017554012345679 0.103818185816131 0.249384588781868 0.149559082892416 -0.0321899366961563 0.0210262631520163 -0.0171075837742504 0.0153341224604243 -0.0145160806571407 0.00713734567901234;
+        0.00286927716087872 0.136593368810421 0.201074970443365 0.339479954969535 0.164397864607267 -0.0260484364615523 0.0127399306249393 -0.00815620454308202 0.00627037388217603 -0.00286927716087872;
+        0.0152149561732244 0.110297082689861 0.233440527881186 0.289200104648429 0.369910942265696 0.179464641196877 -0.0375399196961666 0.0242093528947391 -0.0200259122383839 0.00947640185146695;
+        0.00520833333333334 0.131083537229178 0.20995020087768 0.319047619047619 0.322836242652128 0.376052442500301 0.152380952380952 -0.024100265443764 0.0127492707559062 -0.00520833333333333;
+        0.0131580246959603 0.114843401005169 0.227336279387047 0.299220328493314 0.347882037265605 0.337052662041377 0.316637311034378 0.12768360784343 -0.0293025419760333 0.011533333328731;
+        0.00673504382217329 0.127802773462876 0.21400311568839 0.313312558886712 0.332320021608814 0.355738586947393 0.289302267356911 0.240342829317707 0.0668704675171058 -0.00673504382217329;
+        0.0123456790123457 0.116567456572037 0.225284323338104 0.301940035273369 0.343862505804144 0.343862505804144 0.301940035273369 0.225284323338104 0.116567456572037 0.0123456790123457
+    ]
+@doc raw"""
+10-point Chebyshev type 2 differentiation matrix computed using Chebfun.
+
+Used for numerical differentiation in boundary asymptotics for Gauss-Jacobi.
+"""
+const DIFFMAT_10 = [
+        -27.1666666666667 33.1634374775264 -8.54863217041303 4 -2.42027662546121 1.70408819104185 -1.33333333333333 1.13247433143179 -1.03109120412576 0.5;
+        -8.29085936938159 4.01654328417507 5.75877048314363 -2.27431608520652 1.30540728933228 -0.898197570222574 0.694592710667722 -0.586256827714545 0.532088886237956 -0.257772801031441;
+        2.13715804260326 -5.75877048314363 0.927019729872654 3.75877048314364 -1.68805925749197 1.06417777247591 -0.789861687269397 0.652703644666139 -0.586256827714545 0.283118582857949;
+        -1 2.27431608520652 -3.75877048314364 0.333333333333335 3.06417777247591 -1.48445439793712 1 -0.789861687269397 0.694592710667722 -0.333333333333333;
+        0.605069156365302 -1.30540728933228 1.68805925749197 -3.06417777247591 0.0895235543024196 2.87938524157182 -1.48445439793712 1.06417777247591 -0.898197570222574 0.426022047760462;
+        -0.426022047760462 0.898197570222574 -1.06417777247591 1.48445439793712 -2.87938524157182 -0.0895235543024196 3.06417777247591 -1.68805925749197 1.30540728933228 -0.605069156365302;
+        0.333333333333333 -0.694592710667722 0.789861687269397 -1 1.48445439793712 -3.06417777247591 -0.333333333333335 3.75877048314364 -2.27431608520652 1;
+        -0.283118582857949 0.586256827714545 -0.652703644666139 0.789861687269397 -1.06417777247591 1.68805925749197 -3.75877048314364 -0.927019729872654 5.75877048314363 -2.13715804260326;
+        0.257772801031441 -0.532088886237956 0.586256827714545 -0.694592710667722 0.898197570222574 -1.30540728933228 2.27431608520652 -5.75877048314363 -4.01654328417507 8.29085936938159;
+        -0.5 1.03109120412576 -1.13247433143179 1.33333333333333 -1.70408819104185 2.42027662546121 -4 8.54863217041303 -33.1634374775264 27.1666666666667
+    ]
diff --git a/src/gaussjacobi.jl b/src/gaussjacobi.jl
@@ -151,15 +151,6 @@ function innerjacobi_rec!(n, x, α::T, β::T, P, PP) where {T <: AbstractFloat}
     nothing
 end
 
-function innerjacobi_rec(n, x, α::T, β::T) where {T <: AbstractFloat}
-    # EVALUATE JACOBI POLYNOMIALS AND ITS DERIVATIVE USING THREE-TERM RECURRENCE.
-    N = length(x)
-    P = Array{T}(undef,N)
-    PP = Array{T}(undef,N)
-    innerjacobi_rec!(n, x, α, β, P, PP)
-    return P, PP
-end
-
 function weightsConstant(n, α, β)
     # Compute the constant for weights:
     M = min(20, n - 1)
@@ -185,12 +176,12 @@ function jacobi_asy(n::Integer, α::Float64, β::Float64)
     x, w = asy1(n, α, β, nbdy)
 
     # Boundary formula (right):
-    xbdy = boundary(n, α, β, nbdy)
+    xbdy = asy2(n, α, β, nbdy)
     x[bdyidx1], w[bdyidx1] = xbdy
 
     # Boundary formula (left):
     if α ≠ β
-        xbdy = boundary(n, β, α, nbdy)
+        xbdy = asy2(n, β, α, nbdy)
     end
     x[bdyidx2] = -xbdy[1]
     w[bdyidx2] = xbdy[2]
@@ -264,9 +255,6 @@ function feval_asy1(n::Integer, α::Float64, β::Float64, t::AbstractVector, idx
     # Number of elements in t:
     N = length(t)
 
-    # Some often used vectors/matrices:
-    onesM = ones(M)
-
     # The sine and cosine terms:
     A = repeat((2n+α+β).+(1:M),1,N).*repeat(t',M)/2 .- (α+1/2)*π/2  # M × N matrix
     cosA = cos.(A)
@@ -350,10 +338,10 @@ function feval_asy1(n::Integer, α::Float64, β::Float64, t::AbstractVector, idx
     g = [1, 1/12, 1/288, -139/51840, -571/2488320, 163879/209018880,
          5246819/75246796800, -534703531/902961561600,
          -4483131259/86684309913600, 432261921612371/514904800886784000]
-    f(g,z) = dot(g, [1;cumprod(ones(9)./z)])
+    f(z) = dot(g, [1;cumprod(ones(9)./z)])
 
     # Float constant C, C2
-    C = 2*p2*(f(g,n+α)*f(g,n+β)/f(g,2n+α+β))/π
+    C = 2*p2*(f(n+α)*f(n+β)/f(2n+α+β))/π
     C2 = C*(α+β+2n)*(α+β+1+2n)/(4*(α+n)*(β+n))
 
     vals = C*S
@@ -367,39 +355,209 @@ function feval_asy1(n::Integer, α::Float64, β::Float64, t::AbstractVector, idx
     return vals, ders
 end
 
-function boundary(n::Integer, α::Float64, β::Float64, npts::Integer)
-# Algorithm for computing nodes and weights near the boundary.
+function asy2(n::Integer, α::Float64, β::Float64, npts::Integer)
+    # Algorithm for computing nodes and weights near the boundary.
 
     # Use Newton iterations to find the first few Bessel roots:
-    smallK = min(30, npts)
-    jk = approx_besselroots(α, smallK)
+    jk = approx_besselroots(α, npts)
 
-    # Approximate roots via asymptotic formula: (see Olver 1974)
+    # Approximate roots via asymptotic formula: (see Olver 1974, NIST, 18.16.8)
     phik = jk/(n + .5*(α + β + 1))
-    x = cos.( phik .+ ((α^2-0.25).*(1 .-phik.*cot.(phik))./(8*phik) .- 0.25.*(α^2-β^2).*tan.(0.5.*phik))./(n + 0.5*(α + β + 1))^2 )
+    t = phik .+ ((α^2-0.25).*(1 .- phik.*cot.(phik))./(2*phik) .- 0.25.*(α^2-β^2).*tan.(0.5.*phik))./(n + .5*(α + β + 1))^2
+
+    # Compute higher terms:
+    higherterms = asy2_higherterms(α, β, t)
 
     # Newton iteration:
     for _ in 1:10
-        vals, ders = innerjacobi_rec(n, x, α, β)  # Evaluate via asymptotic formula.
-        dx = -vals./ders  # Newton update.
-        x += dx  # Next iterate.
-        if norm(dx,Inf) < sqrt(eps(Float64))/200
+        vals, ders = feval_asy2(n, α, β, t, higherterms)  # Evaluate via asymptotic formula.
+        dt = vals./ders  # Newton update.
+        t += dt  # Next iterate.
+        if norm(dt,Inf) < sqrt(eps(Float64))/200
             break
         end
     end
-    vals, ders = innerjacobi_rec(n, x, α, β)  # Evaluate via asymptotic formula.
-    dx = -vals./ders
-    x += dx
+    vals, ders = feval_asy2(n, α, β, t, higherterms)  # Evaluate via asymptotic formula.
+    dt = vals./ders
+    t += dt
 
     # flip:
-    x = reverse(x)
+    t    = reverse(t)
     ders = reverse(ders)
 
     # Revert to x-space:
-    w = 1 ./ ((1 .- x.^2) .* ders.^2)
+    x = cos.(t)
+    w = 1 ./ ders.^2
+
     return x, w
 end
 
+"""
+Evaluate the boundary asymptotic formula at x = cos(t).
+Assumption:
+* `length(t) == n ÷ 2`
+"""
+function feval_asy2(n::Integer, α::Float64, β::Float64, t::AbstractVector, higherterms::HT) where HT <: Tuple{<:Function, <:Function, <:Function, <:Function}
+    rho  = n + .5*(α + β + 1) 
+    rho2 = n + .5*(α + β - 1)
+    A = (.25 - α^2)       
+    B = (.25 - β^2)
+    
+    # Evaluate the Bessel functions:
+    Ja = besselj.(α, rho*t)
+    Jb = besselj.(α + 1, rho*t)
+    Jbb = besselj.(α + 1, rho2*t)
+    Jab = bessel_taylor(-t, rho*t, α)
+
+    # Evaluate functions for recursive definition of coefficients:
+    gt = A*(cot.(t/2) .- (2 ./ t)) .- B*tan.(t/2)
+    gtdx = A*(2 ./ t.^2 .- .5*csc.(t/2).^2) .- .5*B*sec.(t/2).^2
+    tB0 = .25*gt
+    A10 = α*(A+3*B)/24
+    A1  = gtdx/8 .- (1+2*α)/8*gt./t .- gt.^2/32 .- A10
+    # Higher terms:
+    tB1, A2, tB2, A3 = higherterms
+    tB1t = tB1(t) 
+    A2t  = A2(t) 
+
+    # VALS:
+    vals  = Ja + Jb.*tB0/rho + Ja.*A1/rho^2 + Jb.*tB1t/rho^3 + Ja.*A2t/rho^4
+    # DERS:
+    vals2 = Jab + Jbb.*tB0/rho2 + Jab.*A1/rho2^2 + Jbb.*tB1t/rho2^3 + Jab.*A2t/rho2^4
+    
+    # Higher terms (not needed for n > 1000).
+    tB2t    = tB2(t) 
+    A3t     = A3(t)
+    vals  .+= Jb.*tB2t/rho^5 + Ja.*A3t/rho^6
+    vals2 .+= Jbb.*tB2t/rho2^5 + Jab.*A3t/rho2^6
+
+    # Constant out the front (Computed accurately!)
+    ds = .5*(α^2)/n
+    s = ds 
+    jj = 1
+    while abs(ds/s) > eps(Float64)/10
+        jj = jj+1
+        ds = -(jj-1)/(jj+1)/n*(ds*α)
+        s = s + ds
+    end
+    p2 = exp(s)*sqrt((n+α)/n)*(n/rho)^α
+    g = [1, 1/12, 1/288, -139/51840, -571/2488320, 163879/209018880,
+         5246819/75246796800, -534703531/902961561600,
+         -4483131259/86684309913600, 432261921612371/514904800886784000]
+    f(z) = dot(g, [1;cumprod(ones(9)./z)])
+    C = p2*(f(n+α)/f(n))/sqrt(2)
+
+    # Scaling:
+    valstmp = C*vals
+    denom   = sin.(t/2).^(α+.5).*cos.(t/2).^(β+.5)
+    vals    = sqrt.(t).*valstmp./denom
+
+    # Relation for derivative:
+    C2     = C*n/(n+α)*(rho/rho2)^α
+    ders   = (n*(α-β .- (2n+α+β)*cos.(t)).*valstmp .+ 2*(n+α)*(n+β)*C2*vals2)/(2n+α+β)
+    ders .*= sqrt.(t)./(denom.*sin.(t))
+
+    return vals, ders
+end
+
+function asy2_higherterms(α::Float64, β::Float64, theta::AbstractVector)
+    # Higher-order terms for boundary asymptotic series.
+    # Compute the higher order terms in asy2 boundary formula. See [2]. 
+    
+    # These constants are more useful than α and β:
+    A = (0.25 - α^2)
+    B = (0.25 - β^2)
+    
+    # For now, just work on half of the domain:
+    c = max(maximum(theta), .5)
+    
+    # Use 10 Chebyshev modes in order to use precomputed integration and 
+    # differentiation matrices
+    N = 10
+
+    # Scaled 2nd-kind Chebyshev points and barycentric weights:
+    t = .5*c*(sin.(pi*(-(N-1):2:(N-1))/(2*(N-1))) .+ 1)
+    v = [.5; ones(N-1)]
+    v[2:2:end] .= -1
+    v[end]     *= .5
+    
+    # The g's:
+    g   = A*(cot.(t/2) - 2 ./t) - B*tan.(t/2)
+    gp  = A*(2 ./ t.^2 - .5*csc.(t/2).^2) - .5*(.25-β^2)*sec.(t/2).^2
+    gpp = A*(-4 ./ t.^3 + .25*sin.(t).*csc.(t/2).^4) - 4*B*sin.(t/2).^4 .* csc.(t).^3
+    g[1]   = 0 
+    gp[1]  = -A/6-.5*B 
+    gpp[1] = 0
+    
+    # B0:
+    B0     = .25*g./t
+    B0p    = .25*(gp./t - g./t.^2)
+    B0[1]  = .25*(-A/6-.5*B)
+    B0p[1] = 0
+    
+    # A1:
+    A10   = α*(A+3*B)/24
+    A1    = .125*gp .- (1+2*α)/2*B0 .- g.^2/32 .- A10
+    A1p   = .125*gpp .- (1+2*α)/2*B0p .- gp.*g/16
+    A1p_t = A1p./t
+    A1p_t[1] = -A/720 - A^2/576 - A*B/96 - B^2/64 - B/48 + α*(A/720 + B/48)
+    
+    # Make f accurately: (Taylor series approx for small t)
+    fcos = B./(2*cos.(t/2)).^2
+    f = -A*(1/12 .+ t.^2/240+t.^4/6048 + t.^6/172800 + t.^8/5322240 +
+        691*t.^10/118879488000 + t.^12/5748019200 +
+        3617*t.^14/711374856192000 + 43867*t.^16/300534953951232000)
+    idx = t .> .5
+    f[idx] = A.*(1 ./ t[idx].^2 - 1 ./ (2*sin.(t[idx]/2)).^2)
+    f .-= fcos
+    
+    # Integrals for B1: (Note that N isn't large, so we don't need to be fancy).
+    C = (.5*c)*CUMSUMMAT_10
+    D = (2/c)*DIFFMAT_10
+    I = (C*A1p_t)
+    J = (C*(f.*A1))
+    
+    # B1:
+    tB1    = -.5*A1p - (.5+α)*I + .5*J
+    tB1[1] = 0
+    B1     = tB1./t
+    B1[1]  = A/720 + A^2/576 + A*B/96 + B^2/64 + B/48 +
+        α*(A^2/576 + B^2/64 + A*B/96) - α^2*(A/720 + B/48)
+    
+    # A2:
+    K  = C*(f.*tB1)
+    A2 = .5*(D*tB1) - (.5+α)*B1 - .5*K
+    A2 .-= A2[1]
+    
+    # A2p:
+    A2p = D*A2
+    A2p .-= A2p[1]
+    A2p_t = A2p./t
+    # Extrapolate point at t = 0:
+    w = pi/2 .- t[2:end]
+    w[2:2:end] = -w[2:2:end]
+    w[end]     = .5*w[end]
+    A2p_t[1] = sum(w.*A2p_t[2:end])/sum(w)
+    
+    # B2:
+    tB2 = -.5*A2p - (.5+α)*(C*A2p_t) + .5*C*(f.*A2)
+    B2  = tB2./t
+    # Extrapolate point at t = 0:
+    B2[1] = sum(w.*B2[2:end])/sum(w)
+    
+    # A3:
+    K  = C*(f.*tB2)
+    A3 = .5*(D*tB2) - (.5+α)*B2 - .5*K
+    A3 .-= A3[1]
+    
+    tB1f(theta) = bary(theta, tB1, t, v)
+    A2f(theta)  = bary(theta, A2, t, v)
+    tB2f(theta) = bary(theta, tB2, t, v)
+    A3f(theta)  = bary(theta, A3, t, v)
+
+    return tB1f, A2f, tB2f, A3f
+end
+
 function jacobi_jacobimatrix(n, α, β)
     s = α + β
     ii = 2:n-1
@@ -426,3 +584,71 @@ function jacobi_gw(n::Integer, α, β)
     w = V[1,:].^2 .* jacobimoment(α, β)
     return x, w
 end
+
+function bary(x::AbstractVector, fvals::AbstractVector, xk::AbstractVector, vk::AbstractVector)
+    # simple barycentric interpolation routine adapted from chebfun/bary.m
+
+    # Initialise return value:
+    fx = zeros(length(x))
+
+    # Loop:
+    for j in eachindex(x)
+        xx = vk ./ (x[j] .- xk)
+        fx[j] = dot(xx, fvals) / sum(xx)
+    end
+
+    # Try to clean up NaNs:
+    for k in findall(isnan.(fx))
+        index = findfirst(x[k] .== xk) # Find the corresponding node
+        if !isempty(index)
+            fx[k] = fvals[index]       # Set entries to the correct value
+        end
+    end
+
+    return fx
+end
+
+function bessel_taylor(t::AbstractVector, z::AbstractVector, α::Float64)
+    # Accurate evaluation of Bessel function J_A for asy2. (See [2].)
+    # evaluates J_A(Z+T) by a Taylor series expansion about Z. 
+    
+    npts = length(t)
+    kmax = min(ceil(Int64, abs(log(eps(Float64))/log(norm(t, Inf)))), 30)
+    H = t' .^ (0:kmax)
+    # Compute coeffs in Taylor expansions about z (See NIST 10.6.7)
+    nu = ones(Int64, length(z)) * (-kmax:kmax)'
+    JK = z * ones(2kmax+1)'
+    Bjk = besselj.(α .+ nu, JK)
+    nck = nck_mat(floor(Int64, 1.25*kmax)) # nchoosek
+    AA = [Bjk[:,kmax+1] zeros(npts, kmax)]
+    fact = 1
+    for k = 1:kmax
+        sgn = 1
+        for l = 0:k
+            AA[:,k+1] = AA[:,k+1] + sgn*nck[k,l+1]*Bjk[:,kmax+2*l-k+1]
+            sgn = -sgn
+        end
+        fact = k*fact
+        AA[:,k+1] ./= 2^k * fact
+    end
+    # Evaluate Taylor series:
+    Ja = zeros(npts)
+    for k = 1:npts
+        Ja[k] = dot(AA[k,:], H[:,k])
+    end
+
+    return Ja
+end
+
+function nck_mat(n::Integer)
+    # almost triangular matrix storing n choose k
+    M = zeros(Int64, n-1, n)
+    M[:,1] .= 1
+    M[1,2]  = 1
+    for i=2:n-1
+        for j=2:i+1
+            M[i,j] = M[i-1,j-1] + M[i-1,j]
+        end
+    end
+    return M
+end
diff --git a/test/test_gaussjacobi.jl b/test/test_gaussjacobi.jl