diff --git a/src/LinSolvers.jl b/src/LinSolvers.jl index dad76bc95..968150573 100644 --- a/src/LinSolvers.jl +++ b/src/LinSolvers.jl @@ -110,6 +110,11 @@ See [`LinSolver`](@ref) and [`FactorizeLinSolverCreator`](@ref) for examples. function FactorizeLinSolver(nep::NEP, λ, umfpack_refinements) A=compute_Mder(nep,λ) Afact=factorize(A) + # Only activate umfpack_refiments on julia >= 1.9 cf #265 + if (Afact isa SparseArrays.UMFPACK.UmfpackLU & + isdefined(Afact,:control)) + Afact.control[8]=umfpack_refinements # Set the maximum number of refiments + end return FactorizeLinSolver(Afact, umfpack_refinements) end @@ -125,19 +130,7 @@ This function must be overloaded if a user wants to define their own way of solving linear systems. See [`LinSolver`](@ref) for examples. """ function lin_solve(solver::FactorizeLinSolver, x::Array; tol = 0) - with_umfpack_refinements(solver.umfpack_refinements) do - solver.Afact \ x - end - end - - function with_umfpack_refinements(f::Function, refinements) - current_refinements = SuiteSparse.UMFPACK.umf_ctrl[8] - try - SuiteSparse.UMFPACK.umf_ctrl[8] = refinements - f() - finally - SuiteSparse.UMFPACK.umf_ctrl[8] = current_refinements - end + solver.Afact \ x end diff --git a/src/NEPSolver.jl b/src/NEPSolver.jl index 1cdd2d6d0..81916a679 100644 --- a/src/NEPSolver.jl +++ b/src/NEPSolver.jl @@ -32,6 +32,7 @@ module NEPSolver include("method_broyden.jl") include("method_ilan.jl") include("method_nleigs.jl") + include("method_nleigs_toar.jl") diff --git a/src/method_nleigs_toar.jl b/src/method_nleigs_toar.jl new file mode 100644 index 000000000..70ca1bdb3 --- /dev/null +++ b/src/method_nleigs_toar.jl @@ -0,0 +1,416 @@ +export nleigs_toar + +include("nleigs_toar_creator.jl") +include("nleigs_toar_aux.jl"); +include("nleigs_toar_tensor_compress.jl"); + +function evalnrt(nep::NLEIGS_NEP,k,sigma,b::AbstractVector) +# Evaluate the NLEIGS_NEP. Store evaluation in pre-allocated vector b + beta=nep.beta; + s=nep.s; + xi=nep.xi; + b[1]=1/beta[1]; + for i=1:k + b[i+1]=((sigma-s[i])*b[i])/(beta[i+1]*(1.0-sigma/xi[i])); + end + +end + + + + +# Sorts first by region and then by closeness to target. +# The evps in the region closest to the target will appear first. +function region_target_sort(target,rg,evps) + n1=size(evps,1) + SS= [map(i->!rg_check_inside(rg,evps[i]),1:n1) abs.(evps .- target) ] + + # Sort first by inregion and then by target distance + II=sortperm([real.(SS[i,:]) for i=1:n1],lt=(a,b)-> + begin + if (a[1] == b[1]) + return a[2]create_linsolver(linsolvercreator,nep,λ), nep.shifts); + return lsolvers +end + +# Restart using Schur form. Note: The matrices H and K are modified. +function nleigs_toar_restart(nep,logger,H,K,nv,nconv,target,rg,errmeasure,iter) + # Only the (nv+1,nv) leading submatrix of the H and K matrices relevant + betah=abs(H[nv+1,nv]); + if (size(nep.shifts,1) > 0) + betak = K[nv+1,nv] + end + + + + @status_nleigs_toar5() + + + + push_info!(logger,2,"Solving the hessenberg GEP "); + # Solve the GEP: + F=schur(K[1:nv,1:nv],H[1:nv,1:nv]); + z=nconv; + # Pick the locked eigenvalues + locked_evps=diag(H[1:z,1:z]) .\ diag(K[1:z,1:z]) + + Fvalues=F.values; + + map(s->begin + Fvalues[argmin(abs.(Fvalues .- locked_evps[s]))]=Inf + end, + 1:z) + truevals=map(t->!isinf(Fvalues[t]), 1:size(Fvalues,1)) + + evps_ref1=region_target_sort(target, rg, + Fvalues[truevals]) + evps_ref=[locked_evps;evps_ref1]; + + + + @status_nleigs_toar6() + # Reorder according to evps_ref + for k=1:nv + t=falses(nv); + t[1:(k-1)].=true; + i=argmin(abs.(F.values .- evps_ref[k])); + t[i]=true; + ordschur!(F,t) + end + + λv=F.values; + + Z1=copy(F.Q) + Q1=copy(F.Z) + T1=copy(F.T); + S1=copy(F.S); + + D=I + @status_nleigs_toar7() + + + Z1=Z1*D; + T1=D'*T1*D; + S1=D'*S1*D; + Q1=Q1*D + + set_errmeasure_info(errmeasure,:Z,Z1); + set_errmeasure_info(errmeasure,:T,T1); + set_errmeasure_info(errmeasure,:S,S1); + set_errmeasure_info(errmeasure,:Q,Q1); + set_errmeasure_info(errmeasure,:H,copy(H[1:nv,1:nv])); + set_errmeasure_info(errmeasure,:K,copy(K[1:nv,1:nv])); + + T=T1; + S=S1; + Q=Q1; + + + @status_nleigs_toar8() + + + # To imitate the inplace in slepc + H[1:nv,1:nv]=T[1:nv,1:nv]; + K[1:nv,1:nv]=S[1:nv,1:nv]; + + @status_nleigs_toar9() + + + + + return (betah,betak,λv,Q,Z1); + + +end + +# keep_factor = ctx->keep: NEPNLEIGSSetRestart +# ncv = slepc_nep.ncv: the largest dimension of the working subspace: dimension of the subspace (default: 2*nev) +# nconv = slepc_nep.nconv: number of converged eigenvalues +# mpd = maximum projected dimension +# +# The value of ncv should always be between nev and (nev+mpd), +# +# Vtensor = slepc_nep.data.V: a basis matrix of the structure (I kron U)*S +# V0 = slepc_nep.V: +# +function nleigs_toar(nep,rg; + errmeasure=CheapKrylovErrmeasure(), + Vtensor=NaN, + maxit=10,tol=1e-13,keep_factor=0.5, + ncv=20,neigs=5,idxrk=0,mpd=ncv,nshiftsw=NaN, + lock=1, + linsolvercreator=DefaultLinSolverCreator(), + target=0.0, + logger=0) + + + @parse_logger_param!(logger) + + + ## TODO: Extract eigenvectors + ## TODO: Better domain discretization + ## TODO: Linear dependence check improve + ## TODO: Move RQ-restart to separate function + ## TODO: Improve breakdown in toar_extendbasis, run + + + @status_nleigs_toar0() + + + deg=nep.nmat-1; + evps=[]; + + + lsolvers=get_linsolvers(nep,linsolvercreator); + + + if (Vtensor isa Number) + Vtensor=init_basis(nep,deg,neigs,ncv) + @status_nleigs_toar1() + end + + + + + V0=Vtensor.U; + nconv=0; + + if (isnan(nshiftsw)) + nshiftsw=size(nep.shifts,1); + end + + #ld=size(V0.mat,2); + #lds=deg*ld; + l=0; + iter=0; + breakdown=0; + innerit=-1; + + + + nev=neigs; + ldds=ncv+1; # Leading dimension in ds + push_info!(logger,2,"ldds=$ldds"); + push_info!(logger,2,"nev=$nev"); + #set_active_columns(Vtensor.U,0,ldds); + + Av=get_Av(nep); + + # Initiate W from V0 + W=copy(V0); + W.mat=W.mat[:,1:max(size(Av,1)-1,nep.nmat-1)]; # Resize + #set_active_columns(W,min(size(W.active_mat,2),size(W.mat,2))); # reset active cols + set_active_columns(W,size(W.mat,2)); # reset active cols + W.mat .= 0 # For comparison + + + @status_nleigs_toar2() + + + H=zeros(ComplexF64,ldds,ldds); + K=zeros(ComplexF64,ldds,ldds); + + + locked_evps=[]; + reason_symb = :CONVERGED_ITERATING + + + + VV=[]; + k=0; + + while (reason_symb == :CONVERGED_ITERATING) + iter=iter+1; + push_info!(logger,1,"Outer iteration:$iter"); + + @status_nleigs_toar3() + + + + nv=min(nconv+ mpd,ncv); + push_info!(logger,2,"nv=$nv"); + push_info!(logger,2,"Calling nleigs_toar_run"); + (nv,breakdown,idxrk)= + nleigs_toar_run(nep,V0,Vtensor, + idxrk, # also return value + nshiftsw, + view(K,1:ldds,1:(ldds-1)), + view(H,1:ldds,1:(ldds-1)), + ldds,W,nconv+l, + nv, # also return value + breakdown, # also return value + lsolvers, + iter, + logger); + + + @status_nleigs_toar4() + + + + (betah,betak,λv,Q,Z1)=nleigs_toar_restart(nep,logger,H,K,nv,nconv,target,rg,errmeasure,iter) + + + + # Check convergence and get k and eigvals + (k,evps)=compute_convergence(nep,errmeasure,nconv,nv-nconv,betah,betak,k,λv,rg,tol,iter,logger) + + @status_nleigs_toar10() + + reason_symb=basic_stopping(iter,maxit,k,nev); + + # compute l + if ((reason_symb != :CONVERGED_ITERATING) || (breakdown > 0)) + l=0; + push_info!(logger,"reason stopped: CONVERGED ITERATING"); + else + l=Int(max(1,floor((nv-k)*keep_factor))); + if (breakdown == 0) # ? + + # RQ restart + if (size(nep.shifts,1)==0) + + + error("Not implemented"); + l= (k+l)-k; + #newn=? + else + + + i0=0+(lock>0); + + # Note that H and K matrices are modified by the ds-solver in SLEPc + + + + + # Set the row + for i=i0:(k+l-1) + H[k+l+1,i+1]=betah*Q[nv,i+1]; + K[k+l+1,i+1]=betak*Q[nv,i+1]; + end + + @status_nleigs_toar11() + + + end + + end + + end + + + if ((lock==0) && reason_symb == :CONVERGED_ITERATING && breakdown==0) + l=l+k; k=0; + end + + MQ = Z1 # Slepc notation + Vtensor.S[:,1 .+ (nconv:(k+l-1))]= + Vtensor.S[:,1:size(MQ,1)]*MQ[:,1 .+ (nconv:(k+l-1))]; + + @status_nleigs_toar12() + + + # Copy last column of S + Vtensor.S[:,l+k+1]=Vtensor.S[:,nv+1]; + + if (breakdown >0) + println("Breakdown!"); + return; + end + + + # If not converged iteration: + if (reason_symb != :CONVERGED_ITERATING) + l=l-1; + end + + + + + nq=size(V0.active_mat, 2); + + if (k+l+deg <= nq) + + set_active_columns(Vtensor,nconv,k+l+1); + # Compress tensor Vtensor + push_info!(logger,2,"Compress: Running compress (k=$k)"); + if (lock!=0) + tensor_compress(Vtensor,k-nconv,logger,iter=iter); + else + tensor_compress(Vtensor,0,logger,iter=iter); + end + + + @status_nleigs_toar13() + + + end + + nconv=k; + @status_nleigs_toar14() + end + + + # Pick out the eigenvectors + kk=size(Vtensor.U.active_mat,2) + pp=Int(ceil(size(Vtensor.U.mat,1)*Vtensor.d/size(nep,1))); + II2=Matrix{Float64}(I,pp,pp); + VV=(kron(II2,Vtensor.U.active_mat)*Vtensor.S[1:(pp*kk),1:nconv])[1:size(nep,1),:] + + push_info!(logger,2,"nconv=$nconv"); + + return (evps,VV); +end + + +## Get a starting basis (random) in tensor form +function init_basis(nep,deg,neigs,ncv) + m=ncv+nep.nmat-1 + tensor_U=Basis_standard(zeros(ComplexF64,size(nep,1),m),0,0); + + # From SLEPC documentation:The dimensions of S are d times m rows and m-d+1 columns, where m is the number of columns of U, so m should be at least d. + pp=m-deg+1 + + tensor_l=0; + tensor_d=deg; + tensor_m=pp + tensor_nqt=pp + + + set_active_columns(tensor_U,deg); + # Random initialization of the basis. Two levels of orthogonality must be imposed + RR=randn(size(tensor_U.mat,1),deg); normalize!(vec(RR)); + F=qr(RR); + tensor_U.mat[:,1:deg]=Matrix(F.Q); + tensor_S=zeros(ComplexF64,deg*m,pp); + S1=[F.R[1:deg,1:deg];zeros(m-deg,deg)]; s1=vec(S1); + tensor_S[:,1]=s1/norm(vec(tensor_U.mat*S1)) # Same but faster + Vtensor=Basis_tensor(tensor_U,tensor_S,tensor_m, + tensor_l,tensor_nqt,tensor_d); + return Vtensor +end + +## A basic stopping criteria function. Returns a symbol indicating +## if the iteration has converged +function basic_stopping(its,maxit,nconv,nev) + if (its>=maxit) + return :CONVERGED_ITS; # required more than max_it iterations to reach convergence + end + if (nconv>=nev) + return :CONVERDED_TOL; + end + return :CONVERGED_ITERATING # Inner iteration okay +end diff --git a/src/nleigs_toar_aux.jl b/src/nleigs_toar_aux.jl new file mode 100644 index 000000000..9311adfa6 --- /dev/null +++ b/src/nleigs_toar_aux.jl @@ -0,0 +1,560 @@ +## Auxiliary routines for nleigs toar + +## Basis operations +import Base.copy; +import NonlinearEigenproblems.Errmeasure +import NonlinearEigenproblems.estimate_error; + +mutable struct Basis_standard{M1<:AbstractMatrix} + mat::M1 + active_mat::SubArray + # n=size(mat,1); + # m=size(mat,2); + # l=not available + l::Int # locked + # nqt=k= size(active_mat,2) # active columns +end + +function set_active_columns(B::Basis_standard,k1) + B.active_mat=view(B.mat,1:size(B.mat,1),1:k1); +end +function set_active_columns(B::Basis_standard,l1,k1) + B.active_mat=view(B.mat,1:size(B.mat,1),1:k1); + B.l=l1; +end +function copy(B::Basis_standard) + B2mat=copy(B.mat); + B2=Basis_standard(B2mat,view(B2mat,:,1:size(B.active_mat,2)),B.l); +end +function Basis_standard(mat::AbstractMatrix,l::Number,k::Number) + active_mat=view(mat,1:size(mat,1),1:k); + return Basis_standard(mat,active_mat,l); +end +function orthogonalize(B::Basis_standard,j) + # Gram-Schmidt orthogonalization: + H=B.mat[:,1:(j-1)]'*B.mat[:,j] + y=B.mat[:,j]-B.mat[:,1:(j-1)]*H; + beta=norm(y); + B.mat[:,j]=y; + + # Orthogonalize twice + G=B.mat[:,1:(j-1)]'*B.mat[:,j] + y=B.mat[:,j]-B.mat[:,1:(j-1)]*G; + + beta=norm(y); + B.mat[:,j]=y; + + H=H+G; + return (H,beta); +end + +# Represents (I otimes U) S . U is represented as a Basis_standard +mutable struct Basis_tensor{M<:AbstractMatrix} + U::Basis_standard{M} + S::M + m::Int # size + l::Int # locked columns + nqt::Int # active cols (aka k) + d::Int # degree +end +#function Basis_tensor() +# return Basis_tensor([],[],0,0,0,0); +#end + + +function copy(B::Basis_tensor) + U1=copy(B.U); + S1=copy(B.S); + return Basis_tensor{typeof(S1)}(U1,S1,B.m,B.l,B.nqt,B.d); +end + + +function set_active_columns(B::Basis_tensor,k1) + B.nqt=k1; +end +function set_active_columns(B::Basis_tensor,l1,k1) + B.l=l1; + B.nqt=k1; +end + +function orthogonalize(B::Basis_tensor,j) + # The tensor is an implicit representation of V = (I otimes U) S, + # Just carry out the operations in the factorized form. + S=B.S; + U=B.U; + + # Derivation of h-formula: + # Q=kron(I,U)*S[:,1:j-1]; + # z=kron(I,U)*S[:,j]; + # h=Q[:,1:j-1]*Q[:,j]; + # h=S[:,1:j-1]'*kron(I,U')*kron(I,U)*S[:,j]; + h=S[:,1:j-1]'*S[:,j]; + # Derivation of S-formula: + # Q[:,j]=Q[:,j]-Q[:,1:j-1]*h; + S[:,j]=S[:,j]-S[:,1:j-1]*h; + beta=norm(S[:,j]); + + return (h,beta); +end + +include("nleigs_toar_debugging.jl"); + + + +## Error measure +mutable struct CheapKrylovErrmeasure <: Errmeasure + Z + Q + T + S + H + K +end + +function CheapKrylovErrmeasure() + return CheapKrylovErrmeasure([],[],[],[],[],[]); +end + +function estimate_error(errmeasure::CheapKrylovErrmeasure,λ,v) + # Compute the Ritz vectors + nn=minimum(size(errmeasure.K)); + EE=eigen(errmeasure.K[1:nn,1:nn],errmeasure.H[1:nn,1:nn]); + ee=EE.values; + ee[isnan.(ee)] .= Inf; + i=argmin(abs.(EE.values .- λ)); + if (abs(EE.values[i] - λ)>1e-10) + @show λ + @show EE.values[i] + @show "Warning did not find the eigenvalue" + end + # Compute the error from the eigenvector + return abs(EE.vectors[nn,i]); +end + +function set_errmeasure_info(errmeasure::CheapKrylovErrmeasure,key::Symbol,val) + # This will generate strange errors if it is not a member. + setfield!(errmeasure,key,val); +end + +function set_errmeasure_info(errmeasure::Errmeasure,key::Symbol,val) + # Do nothing by default +end +errmeasure_use_true_vector(errmeasure::Errmeasure)=true; +errmeasure_use_true_vector(errmeasure::CheapKrylovErrmeasure)=false; +function rg_check_inside(rg,λ) + # Rectangle + return (real(λ)>rg[1] && real(λ)rg[3] && imag(λ)h) + @warn "The rectangle discretization is not complete"; + end + + dsi=NaN*ds; + return (ds,dsi) +end + + + +function lejabagby_toar(nep::NEP,rg,ddmaxit,singularity_computation) + + @status_lejabagby0() + ndptx=10000 # Default values + ndpt=10000; + nrs=zeros(ComplexF64,ndpt) + nrx=zeros(ComplexF64,ndpt) + s=zeros(ComplexF64,ddmaxit) + xi=zeros(ComplexF64,ddmaxit); + beta=zeros(ComplexF64,ddmaxit) + nrxi=zeros(ComplexF64,ndpt); + nrs=zeros(ComplexF64,ndpt); + + (ds,dsi)=discretize_region(rg,ndpt) + @status_lejabagby1() + + + # Compute the singularities. If not user specified, assume no singularities + (dxi,new_ndptx)=(Inf*ones(ndptx),0) + if (!isnothing(singularity_computation)) + (dxi,new_ndptx)=singularity_computation(nep,ndptx); + end + ndptx=new_ndptx + + @status_lejabagby2() + + + s[1] = ds[1]; + if (size(dxi,1)>0) + xi[1] = dxi[1] + else + xi[1]=Inf; + end + + beta[1]=1.0 + nrs.=1 + nrxi.=1 + + for k=1:ddmaxit-1 + + @status_lejabagby3() + + maxnrs=0; + minnrxi=typemax(Float64); + for i=0:(ndpt-1) + nrs[i+1] *= ((ds[i+1]-s[k-1+1])/(1.0-ds[i+1]/xi[k-1+1]))/beta[k-1+1]; + if (abs(nrs[i+1])>maxnrs) + maxnrs = abs(nrs[i+1]); + s[k+1] = ds[i+1]; + if (k==4) + #@show k,i,s[k+1] + end + end + end + + if (ndptx>k) + for i=1:(ndptx-1) + nrxi[i+1] *= ((dxi[i+1]-s[k-1+1])/(1.0-dxi[i+1]/xi[k-1+1]))/beta[k-1+1]; + if (abs(nrxi[i+1])0) + for i=1:(j+1) + K[i,j+1] = sigma*H[i,j+1] + tt[i]; + end + K[j+2,j+1]= sigma*H[j+2,j+1]; + end + + # Scale the column + Vtensor.S[:,j+2] *= 1/nrm; + set_active_columns(V0,l,nqt); + @status_run7() + + # Skipping breakdown check ... if (breakdown) + + end + + + return (m,breakdown,idxrk); # return new values for m,breakdown,idxrk + +end + + +function compute_convergence(nep::NEP,errmeasure::ErrmeasureType,kini,nits,betah,betak,k0,λv,rg,tol,iter,logger) + # w is a work vector and skipped in the params + marker=-1; + use_true_vector=errmeasure_use_true_vector(errmeasure); + + kout=0; + + for k=kini:(kini+nits-1) + # Check inside + inside=rg_check_inside(rg,λv[k+1]); + if marker==-1 && !inside + marker = k; + end + + + if (use_true_vector) + # Compute the eigenvector approx + + # TODO + error("Not yet implemented"); + e=estimate_error(errmeasure,λv[k+1],v); + else + e=estimate_error(errmeasure,λv[k+1],[]); + e *= abs((betak-real(λv[k+1])*betah)+1im*imag(λv[k+1])*betah); + end + + s=(λv[k+1],e) + push_info!(logger,1,"(λ,rnorm)=$s"); + if (marker==-1 && e >= tol) + marker = k; + end + if (marker != -1) + break; + end + + end + if (marker!=-1) + k = marker; + end + kout=k; + return (kout,λv[1:k]); +end diff --git a/src/nleigs_toar_creator.jl b/src/nleigs_toar_creator.jl new file mode 100644 index 000000000..9f1109ffb --- /dev/null +++ b/src/nleigs_toar_creator.jl @@ -0,0 +1,102 @@ +using NonlinearEigenproblems +import NonlinearEigenproblems.get_Av; +import NonlinearEigenproblems.compute_Mder; +import NonlinearEigenproblems.compute_Mlincomb; +import Base.size; +import NonlinearEigenproblems.AbstractSPMF +export NLEIGS_NEP + +mutable struct NLEIGS_NEP{T} <: AbstractSPMF{T} + org_nep::NEP + shifts + beta + s + xi + nmat + coeffD + ddmaxit +end + +function nleigs_nep_fv(nleigs_nep::NLEIGS_NEP,s) + + Av=get_Av(nleigs_nep.org_nep); + nt=size(Av,1); + fv=zeros(ComplexF64,nt); + + coeffs=ones(ComplexF64,nleigs_nep.nmat)*NaN; + evalnrt(nleigs_nep,nleigs_nep.nmat-1,s,coeffs); + + for k=0:(nt-1) + alpha = 0.0; + for j=0:nleigs_nep.nmat-1 + alpha += coeffs[j+1]*nleigs_nep.coeffD[k+1,j+1]; + end + fv[k+1]=alpha; + end + + return fv + + +end + +get_Av(nep::NLEIGS_NEP)=get_Av(nep.org_nep); +size(nep::NLEIGS_NEP)=size(nep.org_nep); +size(nep::NLEIGS_NEP,k)=size(nep.org_nep,k); + +function NLEIGS_NEP(orgnep,rg;ddmaxit=100,shifts=[1.1+1e-5*1im],nmat=NaN, + singularity_computation=nothing) + + (beta,xi,s)=lejabagby_toar(orgnep,rg,ddmaxit,singularity_computation); + ddtol=1e-9 + (coeffD,cnmat)=divided_differences(orgnep,ddmaxit,beta,xi,s,ddtol) + if (isnan(nmat)) + nmat=cnmat; + end + return NLEIGS_NEP{ComplexF64}(orgnep,shifts,beta,s,xi,nmat,coeffD,ddmaxit) +end +compute_Mder(nleigs_nep::NLEIGS_NEP,s::Number)=compute_Mder(nleigs_nep,s,0) + +function compute_Mder(nleigs_nep::NLEIGS_NEP,s::Number,der::Int) + + + Av=get_Av(nleigs_nep.org_nep); + A=zero(Av[1]); + fv=nleigs_nep_fv(nleigs_nep,s) + nt=size(fv,1); + for k=1:nt + A += Av[k]*fv[k]; + end + return A; +end +compute_Mlincomb(nep::NLEIGS_NEP{Complex{Float64}},λ::Number,V::AbstractVecOrMat)=compute_Mlincomb!(nep,λ,copy(V)) + + +function divided_differences(orgnep::NEP,ddmaxit,beta,xi,s,ddtol) + + + nmat=ddmaxit + nmax=ddmaxit; # Number of expansion coeffs (max iterations) + fv=get_fv(orgnep); + Av=get_Av(orgnep); + nt=size(fv,1); # Number of terms + coeffD=zeros(ComplexF64,nt,nmax); + matnorm=opnorm.(Av,1); + pK=Matrix{ComplexF64}(I,nmax,nmax); + pK=LowerTriangular(diagm(0=>ones(nmax),-1=> (beta[2:nmax] ./ xi[1:(nmax-1)]))) + pH=LowerTriangular(diagm(0=>s[1:nmax],-1=> beta[2:nmax])) + Z=pH/pK + for k=1:nt + coeffD[k,:]=beta[1]*fv[k](Z)[:,1]; + end + # Compute nmat + norm0=sum(matnorm .* abs.(coeffD[:,1])) + for k=2:ddmaxit-1 + nrm=sum(matnorm .* abs.(coeffD[:,k+1])) + if (nrm/norm0 < ddtol) + nmat=k+1 + break; + end + end + + return (coeffD,nmat) +end diff --git a/src/nleigs_toar_debugging.jl b/src/nleigs_toar_debugging.jl new file mode 100644 index 000000000..2ea2d59de --- /dev/null +++ b/src/nleigs_toar_debugging.jl @@ -0,0 +1,112 @@ + + + +macro status_rkcont1() +end + +macro status_rkcont2() +end +macro status_rkcont3() +end +macro status_extend1() +end +macro status_extend2() +end +macro status_extend3() +end +macro status_extend4() +end +macro status_run1() +end +macro status_run2() +end +macro status_run3() +end +macro status_run4() +end +macro status_run5() +end +macro status_run6() +end +macro status_run7() +end +macro status_nleigs_toar0() +end +macro status_nleigs_toar1() +end +macro status_nleigs_toar2() +end +macro status_nleigs_toar3() +end + + +macro status_nleigs_toar4() +end + + +macro status_nleigs_toar5() +end +macro status_nleigs_toar6() +end +macro status_nleigs_toar7() +end +macro status_nleigs_toar8() +end +macro status_nleigs_toar9() +end +macro status_nleigs_toar10() +end +macro status_nleigs_toar11() +end +macro status_nleigs_toar12() +end +macro status_nleigs_toar13() +end +macro status_nleigs_toar14() +end +macro status_compress0() +end +macro status_compress1() +end + +macro status_compress2() +end +macro status_compress3() +end +macro status_compress4() +end +macro status_compress5() +end + +macro status_compress6() +end +macro status_compress7() +end +macro status_compress8() +end +macro status_compress9() +end +macro status_compress10() +end +macro status_compress11() +end +macro status_compress12() +end +macro status_compress13() +end +macro status_compress14() +end + + +macro status_lejabagby0() +end +macro status_lejabagby1() +end +macro status_lejabagby2() +end +macro status_lejabagby3() +end + + +macro status_lejabagby4() +end diff --git a/src/nleigs_toar_tensor_compress.jl b/src/nleigs_toar_tensor_compress.jl new file mode 100644 index 000000000..7107789be --- /dev/null +++ b/src/nleigs_toar_tensor_compress.jl @@ -0,0 +1,341 @@ + +# This is a reimplementation of the bvtensor functionality in SLEPc: +#https://slepc.upv.es/slepc-master/src/sys/classes/bv/impls/tensor/bvtensor.c.html#BVTensorCompress_Tensor + + + +function tensor_compress(V::Basis_tensor,newc::Int,logger;iter=0) + + @status_compress0() + + cs1=V.nqt; # Nof active columns + rs1=size(V.U.active_mat,2) + rk=0 + + lds=size(V.S,1); + ld=Int(lds/V.d); # Not sure what ctx->ld means + + lds=ld*V.d; # Redundant + deg=V.d; + + lwa=6*ld*lds+2*cs1; + + push_info!(logger,3,"lds=$lds,rs1=$rs1,ld=$ld,lwa=$lwa,deg=$deg") + + n=min(rs1,deg*cs1) + lock=V.U.l; + nnc=cs1-lock-newc; + nrow = rs1-lock; + + SS_mat=zeros(ComplexF64,newc,deg*nnc); + SS2_mat=zeros(ComplexF64,newc,nnc); + pQ=zeros(ComplexF64,(rs1+lock+newc)*n); + + offu = lock*(rs1+1); + sg = zeros(ComplexF64,6*n); + #Z = view(work,nwu .+ (1:deg*cs1*n)); + + nnctdeg=nnc*deg; + + + # Z_mat will be submatrices of Z_mat + Z_mat=zeros(ComplexF64,n,deg*cs1); + + + newctdeg=newc*deg; + cs1tdeg=cs1*deg; + S_mat=V.S; + + + @status_compress1() + + + pQ_offu=view(pQ,offu .+ (1:rs1*min(nrow,newctdeg))) + pQ_offu_mat=reshape(pQ_offu,rs1,min(nrow,newctdeg)); + + + + # Not sure how big the M_mat should be in the end, but this should be enough + M0_mat=zeros(ComplexF64, nrow, Int(ceil(rs1*cs1*deg/nrow))) + M_mat=view(M0_mat,1:nrow,1:newctdeg); + + + + if (newc>0) + + push_info!(logger,3,"newc>0") + push_info!(logger,3,"size(S_mat,2)"*string(size(S_mat))) + + + # copy part of S-matrix to M0_mat: truncate columns associated with new converged eigenpairs + + for j=0:(deg-1) + ii=(lock+1):(lock+newc) + M0_mat[:,ii .+ (-lock+j*newc)]=S_mat[(j*ld+lock) .+ (1:nrow),ii] + end + + + + + @status_compress2() + + + + F=svd(M_mat); + M_mat[:] .= 0; + + sg[1:size(F.S,1)]=F.S; + + push_info!(logger,3,"F.S[1:5]="*string(F.S[1:min(5,end)])); + push_info!(logger,3,"size(F.U)="*string(size(F.U))) + push_info!(logger,3,"size(F.V')="*string(size(F.V'))*", n=$n, newctdeg=$newctdeg") + push_info!(logger,3,"min(nrow,newctdeg)="*string(min(nrow,newctdeg))) + Z_mat[1:min(nrow,newctdeg),1:newc*deg]=F.V'; + + + + pQ_offu_mat_active=view(pQ_offu_mat,1:nrow,:); + + push_info!(logger,3,"rs1=$rs1") + push_info!(logger,3,"nrow=$nrow,newctdeg=$newctdeg") + + pQ_offu_mat_active[:,:]=F.U; + + + @status_compress3() + + + push_info!(logger,3,"newc=$newc,nrow=$nrow") + + rk=min(newc,nrow); + rmul!(view(Z_mat,1:rk,1:newctdeg)',Diagonal(sg[1:rk])) + + + for i=0:deg-1 + for j=lock:(lock+newc-1) + # Store into Z + S_mat[i*ld+lock .+ (1:rk),j+1] = Z_mat[1:rk,newc*i+j-lock+1] + # Zero-out unneccesary parts of S: + S_mat[i*ld+lock+rk .+ (1:(ld-lock-rk)),j+1] .= 0; + + end + end + +# + push_info!(logger,3,"lds=$lds") + push_info!(logger,3,"deg=$deg") + + + + end # end newc + + + + + + @status_compress4() + + + # Orthogonalize against pQ: next M should get rank nnc+d-1 + push_info!(logger,3,"newc=$newc, size(SS_mat)="*string(size(SS_mat))) + for i=0:deg-1 + pQ_view=view(pQ_offu_mat,1:nrow,1:newc); + + + + S_mat_view=view(S_mat,i*ld+lock .+ (1:nrow), (lock+newc) .+ (1:nnc)); + QQ=S_mat_view # Same notation as SLEPc + if (newc>0) + SS_mat[1:newc ,i*nnc .+ (1:nnc)] = pQ_view'*S_mat_view + S_mat_view[:,:] -= pQ_view*SS_mat[: ,i*nnc .+ (1:nnc)]; + + end + + + + + @status_compress5() + + + # Repeat it + + + SS2_mat[:,:]= pQ_view'*S_mat_view; + + + S_mat_view[:,:] -= pQ_view*SS2_mat; + + + if (newc>0) # otw empty matrix + SS_mat[1:newc,i*nnc .+ (1:nnc)] += SS2_mat; + end +# + + + end +@status_compress7() + + for j=0:deg-1 + ii=(lock+newc+1):(cs1) + M0_mat[1:nrow, ii .+ (-lock-newc+j*nnc)]= + S_mat[j*ld+lock .+ (1:nrow),ii] + end + +@status_compress8() + + + push_info!(logger,3,"nrow=$nrow,nnctdeg=$nnctdeg") + push_info!(logger,3,"size(M_mat)="*string(size(M_mat))) + #push_info!(logger,3,"size(M)="*string(size(M))) + + + M_mat=view(M0_mat,1:nrow,1:nnctdeg) + F=svd(M_mat) + + msz=minimum(size(M_mat)); + pQ_offu_new=reshape(view(pQ,offu+newc*rs1 .+ (1:rs1*msz)),rs1,msz) + pQ_offu_new_active=view(pQ_offu_new,1:nrow,:); + pQ_offu_new_active[:]=F.U; + + Z_mat[1:msz,1:nnctdeg]=F.V'; + + sg[1:length(F.S)]=F.S; + + + @status_compress9() + + if (size(sg,1)>0) + tol=max(rs1,deg*cs1)*eps(Float64)*sg[1]; + else + tol=max(rs1,deg*cs1)*eps(Float64); + end + + rk =0; + for i=1:min(nrow,nnctdeg) + if (real(sg[i])>real(tol)) + rk += 1; + end + end + rk = min(nnc+deg-1,rk); + @status_compress10() + + rmul!(view(Z_mat,1:rk,1:nnctdeg)',Diagonal(sg[1:rk])) + + + + mm=deg*cs1 + + push_info!(logger,3,"size(Z_mat)="*string(size(Z_mat))) + push_info!(logger,3,"V.m="*string(V.m)) + push_info!(logger,3,"cs1=$cs1") + push_info!(logger,3,"lds=$lds") + + S_mat[:,cs1 .+ (1:(V.m-cs1))] .= 0; + k = ld-lock-newc-rk; + for i=0:deg-1; + jj=(lock+newc+1):(cs1) + S_mat[i*ld+lock+newc.+(1:rk),jj]= + Z_mat[1:rk,jj .+ (nnc*i-lock-newc)]; + S_mat[i*ld+lock+newc+rk .+ (1:k),jj] .= 0; + + end + + @status_compress11() + + + if newc>0 + for i=0:deg-1 + p = nnc*newc*i; + for j=(lock+newc):(cs1-1) + for k=1:newc + ## WARNING !!! Not carefully tested + S_mat[i*ld+lock+k,j+1] = vec(SS_mat)[p+1]; + p += 1; + end + end + end + end + + #@optional_slepc_assert(S_mat,"S_mat","/tmp/iter$iter/compress_stage9_large.jl"); + + rk=rk+newc; + + nnc=cs1-lock + + + pQ_offu=view(reshape(view(pQ,offu .+ (1:(rs1*rk))),rs1,rk),1:nrow,1:rk); + push_info!(logger,3,"size(pQ_offu)="*string(size(pQ_offu))) + + FF=qr(pQ_offu); + for i=0:deg-1 + + SQ1=reshape(view(vec(S_mat),(lock*lds+lock+i*ld) .+ (1:(lds*nnc))),lds,nnc); + SQ=view(SQ1,1:rk,1:nnc); + + SQ[:,:]=FF.R*SQ; + end + + + + @status_compress12() + + + Qmat=Matrix(FF.Q); + +#Qmat=copy(FF.Q); + push_info!(logger,3,"($nrow,size(Qmat,1))") + push_info!(logger,3,"($rk,size(Qmat,2))") + + + @status_compress13() + + + rk += lock; + + map(i-> pQ[i+i*rs1+1]=1, 0:(lock-1)); # Make diagonal one of locked part + + push_info!(logger,3,"rs1=$rs1") + push_info!(logger,3,"V.U.l="*string(V.U.l)) + push_info!(logger,3,"size(V.U.active_mat)"*string(size(V.U.active_mat))) + + set_active_columns(V.U,rs1); # V.U.k=rs1; + + + pQ_offu[:]=Qmat; + pQ_mat=reshape(view(pQ,1:(rs1*rk)),rs1,rk); + #Qmat2=[pQ_mat[:,1:lock] Qmat]; + + + push_info!(logger,3,"lock=$lock") + push_info!(logger,3,"rk=$rk") + push_info!(logger,3,"size(V.U.mat)=$size(V.U.mat)") + push_info!(logger,3,"size(V.U.active_mat)") + push_info!(logger,3,"size(Qmat)=$size(Qmat)") + + + @status_compress14() + + + + #push_info!(logger,3,"size(Qmat2[:,1 .+ (lock:(rk-1))])") + # The size of Qmat seems okay. But we want to access columns outside? + + VU_mat_view=view(V.U.mat,:,1 .+ (lock:(rk-1))); + #VU_mat_view[:,:]=V.U.mat*Qmat[:,1 .+ (lock:(rk-1))]; + #VU_mat_view[:,1:(rk-1-lock)]=V.U.active_mat*Qmat[:,1 .+ (lock:(rk-1))]; + push_info!(logger,3,"size(VU_mat_view)") + #push_info!(logger,3,"size(V.U.mat[:,1:size(Qmat2,1)])") + push_info!(logger,3,"size(pQ_mat[:,1 .+ (lock:(rk-1))])") + + VU_mat_view[:]=V.U.mat[:,1:size(pQ_mat,1)]*pQ_mat[:,1 .+ (lock:(rk-1))]; + + +### ctx->qB not used: skip + + # Arrange active columns / locked columns + V.U.l += newc; + set_active_columns(V.U,rk); + + +end +# diff --git a/test/nleigstoar.jl b/test/nleigstoar.jl new file mode 100644 index 000000000..dd702b6fe --- /dev/null +++ b/test/nleigstoar.jl @@ -0,0 +1,27 @@ +# Unit test for the Nonlinear Arnoldi method (in src/method_nlar.jl) +# The "Gun" problem form gun_native.jl + +using NonlinearEigenproblemsTest +using NonlinearEigenproblems +using Test +using LinearAlgebra +using Random + +import Base.exp +exp(A::LowerTriangular)=exp(Matrix(A)); # Only needed for the delay example. Matrix exponential + +@bench @testset "NLEIGS TOAR" begin + TOL = 1e-10; + A0=Diagonal([ 1.0; -18.0; -2.0; 0.0; 1.0; -8.0; 13.0; -14.0; -9.0; -9.0; 8.0; 6.0; 2.0; -9.0; -26.0; -1.0; 9.0; 1.0; -3.0; -14.0]) + A1=Matrix(Diagonal([ 10.0; -6.0; -2.0; -4.0; -8.0; -2.0; -4.0; 4.0; -18.0; -20.0; 5.0; -4.0; -8.0; -2.0; -16.0; -9.0; -11.0; -4.0; -15.0; -1.0])); + A1[1,20]=1; + tau=0.5 + nep=DEP([A0,A1],[0,tau]); + pshifts=[-1.3+0.2im ] # shifts + prg=[-3,3,-0.0001,0.0001]; # region + ptarget=-1.1+0.00001im + nleigs_nep=NLEIGS_NEP(nep,prg,shifts=pshifts); ## Create an approximate NEP + neigs=1 + (λv,VV)=nleigs_toar(nleigs_nep,prg,neigs=neigs,target=ptarget,logger=displaylevel) + @test norm(compute_Mlincomb(nep,λv[1],VV[:,1]))<1e-5 +end