From c51249e5a69ba09f433936242949b5b5623ffeb6 Mon Sep 17 00:00:00 2001 From: cz Date: Tue, 29 Aug 2023 22:49:09 +0800 Subject: [PATCH 01/17] fix bugs --- src/algorithms/utr.jl | 9 +++-- src/utilities/lanczos.jl | 54 +++++++++++++++++------------ test/cutest-benchmark/test_setup.jl | 8 ++--- 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/src/algorithms/utr.jl b/src/algorithms/utr.jl index c7b8465..32e739f 100644 --- a/src/algorithms/utr.jl +++ b/src/algorithms/utr.jl @@ -140,7 +140,7 @@ function iterate_evolve_lanczos( while true # use evolving subspaces state.α = 1.0 - k₁ = (n <= 500) ? round(Int, n * 0.9) : round(Int, n * 0.4) + k₁ = (n <= 500) ? round(Int, n * 0.9) : round(Int, n * 0.02) Ξ = 1e-1 * min(state.∇f |> norm |> sqrt, 1e0) @debug "minimum subspace size $k₁" if iter.hvp === nothing @@ -295,7 +295,6 @@ function Base.iterate( # dec radius Δ /= γ₂ Df /= γ₁ - continue end if ρₐ > 0.9 @@ -322,6 +321,7 @@ function Base.iterate( state.status = true # @info ρₐ state.k += 1 + checknan(state) return state, state end @@ -340,6 +340,11 @@ function counting(iter::T, state::S) where {T<:UTRIteration,S<:UTRState} end end +function checknan(state::S) where {S<:UTRState} + if any(isnan, state.x) + throw(ErrorException("NaN detected in Lanczos, use debugging to fix")) + end +end function utr_display(k, state::UTRState) if k == 1 || mod(k, 30) == 0 diff --git a/src/utilities/lanczos.jl b/src/utilities/lanczos.jl index 3244569..f17d685 100644 --- a/src/utilities/lanczos.jl +++ b/src/utilities/lanczos.jl @@ -242,7 +242,7 @@ function InexactLanczosTrustRegionBisect( k::Int=(b |> length), k₁::Int=0, ϵ=1e-4, - ϵₗ=1e-2, + ϵₗ=1e-4, Ξ=1e-1, bool_reorth::Bool=true, bool_interior::Bool=false @@ -251,10 +251,9 @@ function InexactLanczosTrustRegionBisect( xₖ = zeros(lc.n) λₖ = 0.0 j = lc.j - - + k = min(k, b |> length) while true - if j <= k - 1 # degree not desired; expand + if j <= k #- 1 # degree not desired; expand if j == 1 lc.V[:, 1] = copy(b) / norm(b) u = A(lc.V[:, j]) @@ -269,21 +268,27 @@ function InexactLanczosTrustRegionBisect( end end γⱼ = norm(u) - lc.V[:, j+1] = u / γⱼ lc.γ[j] = γⱼ - j += 1 lc.j = j + (j <= k - 1) && (lc.V[:, j+1] = u / γⱼ) + j += 1 if (j < k₁) && (γⱼ > 1e-1) continue end end + # now compute solution T = SymTridiagonal( - view(lc.α, 1:j), view(lc.γ, 1:j-1) + view(lc.α, 1:j-1), view(lc.γ, 1:j-1) ) + σ * I - V = view(lc.V, :, 1:j) + V = view(lc.V, :, 1:j-1) + + # @debug T[end-1:end, end-1:end] T |> size V |> size + if any(isnan, T) + throw(ErrorException("NaN detected in Lanczos, use debugging to fix")) + end # a mild estimate # minimum eigenvalue λ₁ = eigvals(T, 1:1)[] @@ -295,7 +300,7 @@ function InexactLanczosTrustRegionBisect( Sₖ = ldlt(T + λₖ * I) dₖ = Sₖ \ bₜ dₙ = (dₖ |> norm) - + # @debug "start at $j $λₗ $λᵤ" code = 0 if dₙ <= Δ # check if minimum λ produces an interior point @@ -312,6 +317,8 @@ function InexactLanczosTrustRegionBisect( else λᵤ *= 2 end + @debug "guess $λᵤ $dₙ $Δ" + break end # @debug begin # """ @@ -321,17 +328,17 @@ function InexactLanczosTrustRegionBisect( # end # otherwise, we initialize a search procedure # using bisection (sway to λₗ) - while abs(λᵤ - λₗ) > ϵₗ + while abs(λᵤ - λₗ) > ϵₗ * λₗ λₖ = λₗ * 0.5 + λᵤ * 0.5 Sₖ = ldlt(T + λₖ * I) dₖ = Sₖ \ bₜ dₙ = dₖ |> norm kₗ += 1 - # @debug begin - # """ - # $dₙ $λₖ $λₗ $λᵤ - # """ - # end + @debug begin + """ + $dₙ $λₖ $λₗ $λᵤ + """ + end if dₙ <= Δ - ϵ bool_interior && break λᵤ = λₖ @@ -345,18 +352,19 @@ function InexactLanczosTrustRegionBisect( code = 1 end εₙ = (A(xₖ) + (σ + λₖ) * xₖ - b) |> norm - bool_acc = (max(min(dₙ^2, 1) * Ξ, 1e-6) >= εₙ) || (lc.γ[j-1] < tol) || (j >= k) - @debug """ - try Lanczos size κ $j minimum: $k₁ - - Lanczos residual: εₗ := $γⱼ; - - Newton residual: εₙ := $(εₙ / max(dₙ^2, 1)) ∝ $εₙ / $(dₙ^2) - """ + bool_acc = (max(min(dₙ^2 * Ξ, 1e-1), 1e-3) >= εₙ) || (lc.γ[j-1] < tol) || (j >= k) + # @debug """ + # try Lanczos size κ $j minimum: $k₁ + # - Lanczos residual: εₗ := $γⱼ; + # - Newton residual: εₙ := $(εₙ / max(dₙ^2, 1)) ∝ $εₙ / $(dₙ^2) + # """ ((mod(j, 20) == 0) || bool_acc) && @debug begin """ - terminated. + periodic check + - acc? := $bool_acc - κ := $j, code: $code (1-direct pass, 0-search over λₖ) - εₗ := $γⱼ; - - εₙ := $(εₙ / max(dₙ^2, 1)) ∝ $εₙ / $(dₙ^2) + - εₙ := $εₙ ∝ $εₙ / $(dₙ^2) ~ $Ξ => $(max(min(dₙ^2, 1) * Ξ, 1e-6)) - ls := $kₗ - λₖ := $λₖ >= $λ₁ =: λ₁(H) - |d| := $dₙ <= $Δ =: Δ diff --git a/test/cutest-benchmark/test_setup.jl b/test/cutest-benchmark/test_setup.jl index b76b568..0507c3a 100644 --- a/test/cutest-benchmark/test_setup.jl +++ b/test/cutest-benchmark/test_setup.jl @@ -2,7 +2,7 @@ include("../tools.jl") include("./hsodm_paper_test.jl") -log_freq = 200 +log_freq = 1 precision = tol_grad = 1e-5 max_iter = 20000 max_time = 200.0 @@ -33,7 +33,7 @@ filter_cutest_problem(nlp) = true # filter_optimization_method(k) = k ∈ [:LBFGS, :HSODM] # filter_optimization_method(k) = k ∈ [:DRSOM, :DRSOMHomo] # filter_optimization_method(k) = k == :HSODM -filter_optimization_method(k) = k == [:UTR, :iUTR] +filter_optimization_method(k) = k ∈ [:iUTR] # filter_optimization_method(k) = k == :ARC # filter_optimization_method(k) = k == :HSODMArC # filter_optimization_method(k) = k ∈ [:HSODM, :DRSOMHomo] @@ -44,8 +44,8 @@ filter_optimization_method(k) = k == [:UTR, :iUTR] # choose problem set # PROBLEMS = UNC_PROBLEMS_221104 # PROBLEMS = TEST -PROBLEMS = UNC_PROBLEMS_4to200 -# PROBLEMS = UNC_PROBLEMS_200to5000 +# PROBLEMS = UNC_PROBLEMS_4to200 +PROBLEMS = UNC_PROBLEMS_200to5000 if test_before_start ###################################################################### From 4e9c600c3ac9f922582217b5f93baa9ce0aae60b Mon Sep 17 00:00:00 2001 From: cz Date: Wed, 30 Aug 2023 16:28:06 +0800 Subject: [PATCH 02/17] add TRST (Steihaug-Toint CG) --- src/algorithms/utr.jl | 28 +++++++--- src/utilities/lanczos.jl | 2 +- test/convex/test_logistic.jl | 82 +++++++++++++++-------------- test/convex/test_soft_maximum.jl | 6 +-- test/cutest-benchmark/test_setup.jl | 25 ++++++++- test/tools.jl | 36 ++++++++++++- 6 files changed, 124 insertions(+), 55 deletions(-) diff --git a/src/algorithms/utr.jl b/src/algorithms/utr.jl index 32e739f..bdd8813 100644 --- a/src/algorithms/utr.jl +++ b/src/algorithms/utr.jl @@ -150,7 +150,7 @@ function iterate_evolve_lanczos( -state.∇f, Δ, Sₗ; - σ=σ, + σ=σ * (σ >= 1e-8), k=Sₗ.k, k₁=k₁, Ξ=Ξ @@ -162,7 +162,7 @@ function iterate_evolve_lanczos( -state.∇f, Δ, Sₗ; - σ=σ, + σ=σ * (σ >= 1e-8), k=Sₗ.k, k₁=k₁, Ξ=Ξ @@ -178,10 +178,20 @@ function iterate_evolve_lanczos( df = fz - fx ρₐ = df / dq k₂ += 1 - @debug """inner""" v |> norm, Δ, θ, λ₁, info.kₗ, df, ρₐ, k₁ + @debug """periodic check (main iterate) + |d|: $(v |> norm):, + Ξ: $Ξ, + Δ: $Δ, + σ: $σ, + θ: $θ, + λ₁: $λ₁, + kᵢ: $info.kₗ, k₁: $k₁, + df: $df, + ρₐ: $ρₐ + """ Δ = min(Δ, v |> norm) if (Δ > 1e-8) && ((df < 0) || ((df < Df) && (ρₐ < 0.2))) # not satisfactory - if abs(λ₁) >= 1e-8 # too cvx or ncvx + if abs(λ₁) >= -1e-8 # too cvx or ncvx σ = 0.0 else σ *= γ₁ @@ -189,7 +199,6 @@ function iterate_evolve_lanczos( # dec radius Δ /= γ₂ Df /= γ₁ - continue end if ρₐ > 0.9 @@ -197,7 +206,7 @@ function iterate_evolve_lanczos( Δ *= γ₂ end # do this when accept - state.σ = max(1e-8, σ / grad_regularizer) + state.σ = max(1e-18, σ / grad_regularizer) state.r = max(Δ / grad_regularizer, 1e-1) state.k₂ += k₂ state.kₜ = k₂ @@ -284,7 +293,12 @@ function Base.iterate( dq = -state.α^2 * v'H * v / 2 - state.α * v'state.∇f ρₐ = df / dq k₂ += 1 - @debug """inner""" v |> norm, Δ, θ, λ₁, kᵢ, df, ρₐ + @debug """periodic check (main iterate) + |d|: $(v |> norm):, Δ: $Δ, + θ: $θ, λ₁: $λ₁, + kᵢ: $kᵢ, df: $df, + ρₐ: $ρₐ + """ Δ = min(Δ, v |> norm) if (Δ > 1e-8) && ((df < 0) || ((df < Df) && (ρₐ < 0.2))) # not satisfactory if abs(λ₁) >= 1e-8 # too cvx or ncvx diff --git a/src/utilities/lanczos.jl b/src/utilities/lanczos.jl index f17d685..cd5b8cb 100644 --- a/src/utilities/lanczos.jl +++ b/src/utilities/lanczos.jl @@ -352,7 +352,7 @@ function InexactLanczosTrustRegionBisect( code = 1 end εₙ = (A(xₖ) + (σ + λₖ) * xₖ - b) |> norm - bool_acc = (max(min(dₙ^2 * Ξ, 1e-1), 1e-3) >= εₙ) || (lc.γ[j-1] < tol) || (j >= k) + bool_acc = (max(min(dₙ^2 * Ξ, 1e-1), 1e-8) >= εₙ) || (lc.γ[j-1] < tol) || (j >= k) # @debug """ # try Lanczos size κ $j minimum: $k₁ # - Lanczos residual: εₗ := $γⱼ; diff --git a/test/convex/test_logistic.jl b/test/convex/test_logistic.jl index 1b42df6..f2ce9e6 100644 --- a/test/convex/test_logistic.jl +++ b/test/convex/test_logistic.jl @@ -39,20 +39,20 @@ using .LP using LoopVectorization using LIBSVMFileIO -bool_opt = false +bool_opt = true bool_plot = false bool_q_preprocessed = true f1(A, d=2) = sqrt.(sum(abs2.(A), dims=d)) ε = 1e-6 # * max(g(x0) |> norm, 1) -λ = 1e-7 +λ = 1e-5 if bool_q_preprocessed # name = "a4a" # name = "a9a" # name = "w4a" - name = "covtype" + # name = "covtype" # name = "news20" - # name = "rcv1" + name = "rcv1" X, y = libsvmread("test/instances/libsvm/$name.libsvm"; dense=false) Xv = hcat(X...)' @@ -178,41 +178,41 @@ if bool_opt time_limit=500 ) - rn1 = PFH(name=Symbol("iNewton-1e-7"))(; - x0=copy(x0), f=loss, g=g, hvp=hvpdiff, - maxiter=40, tol=ε, freq=1, - step=:newton, μ₀=0.0, - maxtime=500, linesearch=:backtrack, - bool_trace=true, - eigtol=1e-7, - direction=:warm - ) - rn2 = PFH(name=Symbol("iNewton-1e-8"))(; - x0=copy(x0), f=loss, g=g, hvp=hvpdiff, - maxiter=60, tol=ε, freq=1, - step=:newton, μ₀=0.0, - maxtime=500, linesearch=:backtrack, - bool_trace=true, - eigtol=1e-8, - direction=:warm - ) - rn3 = PFH(name=Symbol("iNewton-1e-9"))(; - x0=copy(x0), f=loss, g=g, hvp=hvpdiff, - maxiter=60, tol=ε, freq=1, - step=:newton, μ₀=0.0, - maxtime=500, linesearch=:backtrack, - bool_trace=true, - eigtol=1e-9, - direction=:warm - ) + # rn1 = PFH(name=Symbol("iNewton-1e-7"))(; + # x0=copy(x0), f=loss, g=g, hvp=hvpdiff, + # maxiter=40, tol=ε, freq=1, + # step=:newton, μ₀=0.0, + # maxtime=500, linesearch=:backtrack, + # bool_trace=true, + # eigtol=1e-7, + # direction=:warm + # ) + # rn2 = PFH(name=Symbol("iNewton-1e-8"))(; + # x0=copy(x0), f=loss, g=g, hvp=hvpdiff, + # maxiter=60, tol=ε, freq=1, + # step=:newton, μ₀=0.0, + # maxtime=500, linesearch=:backtrack, + # bool_trace=true, + # eigtol=1e-8, + # direction=:warm + # ) + # rn3 = PFH(name=Symbol("iNewton-1e-9"))(; + # x0=copy(x0), f=loss, g=g, hvp=hvpdiff, + # maxiter=60, tol=ε, freq=1, + # step=:newton, μ₀=0.0, + # maxtime=500, linesearch=:backtrack, + # bool_trace=true, + # eigtol=1e-9, + # direction=:warm + # ) - ra = HSODM(name=Symbol("adaptive-HSODM"))(; - x0=copy(x0), f=loss, g=g, hvp=hvpdiff, - maxiter=10000, tol=ε, freq=1, - maxtime=500, - direction=:warm, linesearch=:hagerzhang, - adaptive=:none - ) + # ra = HSODM(name=Symbol("adaptive-HSODM"))(; + # x0=copy(x0), f=loss, g=g, hvp=hvpdiff, + # maxiter=10000, tol=ε, freq=1, + # maxtime=500, + # direction=:warm, linesearch=:hagerzhang, + # adaptive=:none + # ) rh = PFH(name=Symbol("PF-HSODM"))(; x0=copy(x0), f=loss, g=g, hvp=hvpdiff, @@ -226,7 +226,7 @@ if bool_opt ru = UTR(name=Symbol("Universal-TRS"))(; x0=copy(x0), f=loss, g=g, H=H, maxiter=10000, tol=1e-6, freq=1, - direction=:warm, subpstrategy=:direct = 1 + direction=:warm, subpstrategy=:lanczos ) end @@ -239,7 +239,8 @@ if bool_plot rh, rn1, rn2, - rn3 + rn3, + ru ] linestyles = [:dash, :dot, :dashdot, :dashdotdot] method_names = [ @@ -248,6 +249,7 @@ if bool_plot L"\texttt{iNewton}-$10^{-7}$", L"\texttt{iNewton}-$10^{-8}$", L"\texttt{iNewton}-$10^{-9}$", + L"\texttt{Universal-TR}" ] for xaxis in (:t, :k) for metric in (:ϵ, :fx) diff --git a/test/convex/test_soft_maximum.jl b/test/convex/test_soft_maximum.jl index 0686522..a01a752 100644 --- a/test/convex/test_soft_maximum.jl +++ b/test/convex/test_soft_maximum.jl @@ -42,8 +42,8 @@ using .LP using LIBSVMFileIO bool_plot = false -bool_opt = false -bool_setup = true +bool_opt = true +# bool_setup = true if bool_setup Random.seed!(2) @@ -119,7 +119,7 @@ if bool_opt ru = UTR(;)(; x0=copy(x0), f=loss, g=grad, H=hess, maxiter=10000, tol=1e-6, freq=1, - direction=:warm, subpstrategy=:direct = 1 + direction=:warm, subpstrategy=:direct ) end diff --git a/test/cutest-benchmark/test_setup.jl b/test/cutest-benchmark/test_setup.jl index 0507c3a..d00a42b 100644 --- a/test/cutest-benchmark/test_setup.jl +++ b/test/cutest-benchmark/test_setup.jl @@ -6,7 +6,7 @@ log_freq = 1 precision = tol_grad = 1e-5 max_iter = 20000 max_time = 200.0 -test_before_start = false +test_before_start = true ########################################## @@ -33,8 +33,9 @@ filter_cutest_problem(nlp) = true # filter_optimization_method(k) = k ∈ [:LBFGS, :HSODM] # filter_optimization_method(k) = k ∈ [:DRSOM, :DRSOMHomo] # filter_optimization_method(k) = k == :HSODM -filter_optimization_method(k) = k ∈ [:iUTR] +# filter_optimization_method(k) = k ∈ [:iUTRhvp] # filter_optimization_method(k) = k == :ARC +filter_optimization_method(k) = k == :TRST # filter_optimization_method(k) = k == :HSODMArC # filter_optimization_method(k) = k ∈ [:HSODM, :DRSOMHomo] # filter_optimization_method(k) = k ∈ [:DRSOM, :HSODM, :DRSOMHomo] @@ -91,6 +92,26 @@ if test_before_start r = wrapper_hsodm(x0, loss, g, H, options_drsom) @test r.state.ϵ < 1e-4 end + @testset "UTR" begin + options_drsom = Dict( + :maxiter => max_iter, + :maxtime => max_time, + :tol => 1e-5, + :freq => log_freq + ) + r = wrapper_utr(x0, loss, g, H, options_drsom) + @test r.state.ϵ < 1e-4 + end + @testset "UTR" begin + options_drsom = Dict( + :maxiter => max_iter, + :maxtime => max_time, + :tol => 1e-5, + :freq => log_freq + ) + r = wrapper_iutr_hvp(x0, loss, g, H, options_drsom; hvp=hvp) + @test r.state.ϵ < 1e-4 + end finalize(nlp) end end \ No newline at end of file diff --git a/test/tools.jl b/test/tools.jl index c9192db..c43448a 100644 --- a/test/tools.jl +++ b/test/tools.jl @@ -140,6 +140,28 @@ function wrapper_arc(nlp) return arc_to_result(nlp, stats, "ARC") end +function wrapper_tr_st(nlp) + reset!(nlp) + stats = ST_TROp( + nlp, + max_time=max_time, + max_iter=max_iter, + max_eval=typemax(Int64), + verbose=true + # atol=atol, + # rtol=rtol, + # @note: how to set |g|? + ) + # AdaptiveRegularization.jl to my style of results + return arc_to_result(nlp, stats, "TRST") +end + + + + +########################################################## +# MY VARIANTS +########################################################## alg_drsom = DRSOM2() wrapper_drsom(x, loss, g, H, options; kwargs...) = @@ -190,7 +212,7 @@ wrapper_hsodm(x, loss, g, H, options; kwargs...) = alg_hsodm_hvp = HSODM() wrapper_hsodm_hvp(x, loss, g, H, options; kwargs...) = alg_hsodm_hvp(; - x0=copy(x), f=loss, g=g, H=H, + x0=copy(x), f=loss, g=g, linesearch=:hagerzhang, direction=:warm, kwargs..., @@ -220,6 +242,14 @@ wrapper_iutr(x, loss, g, H, options; kwargs...) = options... ) +wrapper_iutr_hvp(x, loss, g, H, options; kwargs...) = + alg_utr(; + x0=copy(x), f=loss, g=g, + subpstrategy=:lanczos, + kwargs..., + options... + ) + # My solvers and those in Optim.jl @@ -232,6 +262,7 @@ MY_OPTIMIZERS = Dict( # :HSODMArC => wrapper_hsodm_arc, :UTR => wrapper_utr, :iUTR => wrapper_iutr, + :iUTRhvp => wrapper_iutr_hvp, ) OPTIMIZERS_OPTIM = Dict( @@ -243,5 +274,6 @@ OPTIMIZERS_OPTIM = Dict( # solvers in AdaptiveRegularization.jl OPTIMIZERS_NLP = Dict( - :ARC => wrapper_arc + :ARC => wrapper_arc, # adaptive cubic regularization + :TRST => wrapper_tr_st # trust-region with Steihaug–Toint CG ) From a8da1edb8187cfa1def4e57d5a357f67bf99d2f5 Mon Sep 17 00:00:00 2001 From: cz Date: Fri, 1 Sep 2023 12:42:38 +0800 Subject: [PATCH 03/17] add Mishchenko's strategy --- README.md | 52 ++--- src/algorithms/pfh.jl | 2 +- src/algorithms/utr.jl | 334 +++++++++++++++++----------- src/utilities/trustregion.jl | 21 +- test/convex/test_logistic.jl | 20 +- test/convex/test_soft_maximum.jl | 26 +-- test/cutest-benchmark/test_setup.jl | 8 +- 7 files changed, 264 insertions(+), 199 deletions(-) diff --git a/README.md b/README.md index 361fba6..8255612 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,25 @@ DRSOM.jl is a Julia implementation of the Dimension-Reduced Second-Order Method for unconstrained smooth optimization. -DRSOM.jl now includes a bunch of algorithms, including the variants of original DRSOM and the HSODM: *Homogeneous Second-order Descent Method*. +**DRSOM.jl now includes a bunch of other algorithms, beyond the original `DRSOM`**: +- Homogeneous Second-order Descent Method (`HSODM`). +- Homotopy Path-Following HSODM (`PFH`) +- Universal Trust-Region Method (`UTR`) + +## Reference +You are welcome to cite our papers : ) +``` +1. He, C., Jiang, Y., Zhang, C., Ge, D., Jiang, B., Ye, Y.: Homogeneous Second-Order Descent Framework: A Fast Alternative to Newton-Type Methods, http://arxiv.org/abs/2306.17516, (2023) +2. Zhang, C., Ge, D., He, C., Jiang, B., Jiang, Y., Xue, C., Ye, Y.: A Homogeneous Second-Order Descent Method for Nonconvex Optimization, http://arxiv.org/abs/2211.08212, (2022) +3. Zhang, C., Ge, D., He, C., Jiang, B., Jiang, Y., Ye, Y.: DRSOM: A Dimension Reduced Second-Order Method, http://arxiv.org/abs/2208.00208, (2022) +``` + +## Developer + +- Chuwen Zhang +- Yinyu Ye + + ## Known issues `DRSOM.jl` is still under active development. Please add issues on GitHub. @@ -23,35 +41,3 @@ DRSOM.jl now includes a bunch of algorithms, including the variants of original ## Acknowledgment - Special thanks go to the COPT team and Tianyi Lin [(Darren)](https://tydlin.github.io/) for helpful suggestions. - -## Developer - -- Chuwen Zhang -- Yinyu Ye - -## Reference -You are welcome to cite our paper on DRSOM :) -```bibtex -@misc{zhang_drsom_2023, - title = {{DRSOM}: {A} {Dimension} {Reduced} {Second}-{Order} {Method}}, - url = {http://arxiv.org/abs/2208.00208}, - doi = {10.48550/arXiv.2208.00208}, - author = {Zhang, Chuwen and Ge, Dongdong and He, Chang and Jiang, Bo and Jiang, Yuntian and Ye, Yinyu}, - month = jan, - year = {2023}, - note = {arXiv:2208.00208 [cs, math]}, -} -``` -and HSODM, -```bibtex -@misc{zhang_homogenous_2022, - title = {A {Homogenous} {Second}-{Order} {Descent} {Method} for {Nonconvex} {Optimization}}, - url = {http://arxiv.org/abs/2211.08212}, - publisher = {arXiv}, - author = {Zhang, Chuwen and Ge, Dongdong and He, Chang and Jiang, Bo and Jiang, Yuntian and Xue, Chenyu and Ye, Yinyu}, - month = nov, - year = {2022}, - note = {arXiv:2211.08212 [math]}, - keywords = {Mathematics - Optimization and Control} -} -``` \ No newline at end of file diff --git a/src/algorithms/pfh.jl b/src/algorithms/pfh.jl index ed69d33..6d1fa7d 100644 --- a/src/algorithms/pfh.jl +++ b/src/algorithms/pfh.jl @@ -156,7 +156,6 @@ function Base.iterate(iter::PFHIteration, state::PFHState{R,Tx}) where {R,Tx} B, iter, state, iter.adaptive_param; verbose=iter.verbose > 1 ) else - # H = iter.H(state.x) # B = Symmetric([H state.∇fμ; state.∇fμ' -state.μ]) # q = randn((state.x |> length) + 1) @@ -321,6 +320,7 @@ function Base.show(io::IO, t::T) where {T<:PFHIteration} @printf io " unknown mode to compute Hessian info\n" throw(ErrorException("unknown differentiation mode\n")) end + (t.μ₀ == 0) && @printf io " !!! μ₀ ≈ 0, reduce to inexact Newton Method\n" println(io, "-"^length(t.LOG_SLOTS)) flush(io) end diff --git a/src/algorithms/utr.jl b/src/algorithms/utr.jl index bdd8813..f0ff171 100644 --- a/src/algorithms/utr.jl +++ b/src/algorithms/utr.jl @@ -1,4 +1,3 @@ - using Base.Iterators using LinearAlgebra using Printf @@ -22,6 +21,7 @@ Base.@kwdef mutable struct UTRIteration{Tx,Tf,Tϕ,Tg,TH,Th} H::TH = nothing # hessian function x0::Tx # initial point t::Dates.DateTime = Dates.now() + σ₀::Float64 = 1e-3 adaptive_param = AR() # todo eigtol::Float64 = 1e-9 itermax::Int64 = 20 @@ -29,7 +29,10 @@ Base.@kwdef mutable struct UTRIteration{Tx,Tf,Tϕ,Tg,TH,Th} linesearch = :none adaptive = :none verbose::Int64 = 1 + mainstrategy = :utr subpstrategy = :direct + adaptiverule = :utr + initializerule = :mishchenko LOG_SLOTS::String = UTR_LOG_SLOTS ALIAS::String = "UTR" DESC::String = "Universal Trust-Region Method" @@ -58,8 +61,8 @@ Base.@kwdef mutable struct UTRState{R,Tx} df::R = 0.0 # decrease of the real function value ρ::R = 0.0 # trs descrease ratio: ρ = df/dq ϵ::R = 0.0 # eps 2: residual for gradient - r::R = 1e1 # universal trust-region radius parameter r - σ::R = 1e-1 # universal trust-region regularization parameter σ + r::R = 1e2 # universal trust-region radius parameter r + σ::R = 1e-3 # universal trust-region regularization parameter σ k::Int = 1 # outer iterations kᵥ::Int = 1 # krylov iterations kₜ::Int = 1 # inner iterations @@ -71,7 +74,6 @@ Base.@kwdef mutable struct UTRState{R,Tx} k₂::Int = 0 # 2 oracle evaluations end - @doc raw""" Initialize the state, change of behavior: do not optimize at the first (0th) iterate. @@ -94,13 +96,13 @@ function Base.iterate(iter::UTRIteration) ∇fb=Hv, ∇fz=grad_f_x, ϵ=gₙ, - Δ=gₙ * 1e1 + Δ=gₙ * 1e1, + σ=iter.σ₀ ) if iter.hvp === nothing return state, state end - function hvp(v) iter.hvp(state.x, v, Hv) return Hv @@ -110,10 +112,29 @@ function Base.iterate(iter::UTRIteration) end -function iterate_evolve_lanczos( +function Base.iterate( + iter::UTRIteration, + state::UTRState{R,Tx}; +) where {R,Tx} + # use inexact method (a Lanczos method) + if (iter.subpstrategy == :direct) + return iterate_cholesky(iter, state) + elseif (iter.subpstrategy == :lanczos) + return iterate_evolve_lanczos(iter, state) + else + throw(ErrorException(""" + unsupported mode $(iter.subpstrategy), + currently: {:lanczos, :direct} + """ + )) + end +end + +function iterate_cholesky( iter::UTRIteration, state::UTRState{R,Tx}; ) where {R,Tx} + state.z = z = state.x state.fz = fz = state.fx state.∇fz = state.∇f @@ -122,91 +143,88 @@ function iterate_evolve_lanczos( grad_regularizer = state.ϵ |> sqrt decs_regularizer = grad_regularizer^3 - n = (state.x |> length) - + if iter.hvp === nothing + H = Symmetric(iter.H(state.x)) + else + throw( + ErrorException("only support Hessian mode for direct method") + ) + end k₂ = 0 - γ₁ = 8.0 + γ₁ = 2.0 γ₂ = 2.0 η = 1.0 ρ = 1.0 - σ = state.σ * grad_regularizer - Δ = max(state.r * grad_regularizer, 1e-1) - + # initialize an estimation + if iter.initializerule == :mishchenko + σ₀, _, Df, bool_acc = initial_rules_mishchenko( + state, iter.g, H, iter.H, + state.σ + ) + σ = σ₀ / 2 + r = 1 / 3 / σ₀ + σ₀ = σ₀ * grad_regularizer + else + σ = σ₀ = state.σ + r = max(state.r, 1e-1) + end + σ = σ * grad_regularizer + Δ = r * grad_regularizer Df = (η / ρ) * decs_regularizer # initialize n = state.∇f |> length - Sₗ = DRSOM.Lanczos(n, 2n + 1, state.∇f) + θ = 0.0 while true - # use evolving subspaces - state.α = 1.0 - k₁ = (n <= 500) ? round(Int, n * 0.9) : round(Int, n * 0.02) - Ξ = 1e-1 * min(state.∇f |> norm |> sqrt, 1e0) - @debug "minimum subspace size $k₁" - if iter.hvp === nothing - H = iter.H(state.x) - v, θ, info = DRSOM.InexactLanczosTrustRegionBisect( - H, - -state.∇f, - Δ, - Sₗ; - σ=σ * (σ >= 1e-8), - k=Sₗ.k, - k₁=k₁, - Ξ=Ξ - ) - dq = -state.α^2 * v'H * v / 2 - state.α * v'state.∇f + if iter.mainstrategy == :newton + # if you use Regularized Newton, + # make sure it is convex optimization + F = cholesky(H + σ₀ * I) + v = F \ (-state.∇f) + kᵢ = 1 + θ = λ₁ = 0.0 else - v, θ, info = DRSOM.InexactLanczosTrustRegionBisect( - iter.ff, - -state.∇f, - Δ, - Sₗ; - σ=σ * (σ >= 1e-8), - k=Sₗ.k, - k₁=k₁, - Ξ=Ξ + # if not accepted + # λ (dual) must increase + v, θ, λ₁, kᵢ = TrustRegionCholesky( + H + σ * I, + state.∇f, + 1e3; + # λₗ=θ - σ ) - dq = -state.α^2 * v'iter.ff(v) / 2 - state.α * v'state.∇f + Δ = min(Δ, v |> norm) end - - λ₁ = info.λ₁ - + state.α = 1.0 fx = iter.f(state.z + v * state.α) # summarize x = y = state.z + v * state.α df = fz - fx + dq = -state.α^2 * v'H * v / 2 - state.α * v'state.∇f ρₐ = df / dq k₂ += 1 @debug """periodic check (main iterate) - |d|: $(v |> norm):, - Ξ: $Ξ, - Δ: $Δ, - σ: $σ, - θ: $θ, - λ₁: $λ₁, - kᵢ: $info.kₗ, k₁: $k₁, - df: $df, + |d|: $(v |> norm):, Δ: $Δ, + θ: $θ, λₗ: $λₗ, + kᵢ: $kᵢ, df: $df, ρₐ: $ρₐ """ - Δ = min(Δ, v |> norm) - if (Δ > 1e-8) && ((df < 0) || ((df < Df) && (ρₐ < 0.2))) # not satisfactory - if abs(λ₁) >= -1e-8 # too cvx or ncvx - σ = 0.0 - else - σ *= γ₁ - end - # dec radius - Δ /= γ₂ - Df /= γ₁ - continue - end - if ρₐ > 0.9 - σ /= γ₁ - Δ *= γ₂ + if iter.adaptiverule == :utr + Δ, σ, Df, bool_acc = adaptive_rules_utr( + state, df, + Df, Δ, σ, ρₐ, γ₁, γ₂, + θ, λ₁ - σ + ) + elseif iter.adaptiverule ∈ [:constant, :mishchenko] + # do nothing + bool_acc = true + else + throw( + ErrorException("unrecognized adaptive mode $(iter.adaptiverule)") + ) end + !bool_acc && continue # do this when accept - state.σ = max(1e-18, σ / grad_regularizer) + state.σ = max(1e-12, σ / grad_regularizer) state.r = max(Δ / grad_regularizer, 1e-1) state.k₂ += k₂ state.kₜ = k₂ @@ -225,27 +243,15 @@ function iterate_evolve_lanczos( state.status = true # @info ρₐ state.k += 1 + checknan(state) return state, state end end - - - -function Base.iterate( +function iterate_evolve_lanczos( iter::UTRIteration, state::UTRState{R,Tx}; ) where {R,Tx} - # use inexact method (a Lanczos method) - if (iter.subpstrategy != :direct) - # if n < 2e3 - # return iterate_full_lanczos(iter, state) - # else - # use inexact method of evolving Lanczos - return iterate_evolve_lanczos(iter, state) - # end - end - state.z = z = state.x state.fz = fz = state.fx state.∇fz = state.∇f @@ -254,15 +260,10 @@ function Base.iterate( grad_regularizer = state.ϵ |> sqrt decs_regularizer = grad_regularizer^3 - if iter.hvp === nothing - H = iter.H(state.x) - else - throw( - ErrorException("only support Hessian mode for direct method") - ) - end + n = (state.x |> length) + k₂ = 0 - γ₁ = 2.0 + γ₁ = 8.0 γ₂ = 2.0 η = 1.0 ρ = 1.0 @@ -273,50 +274,66 @@ function Base.iterate( Df = (η / ρ) * decs_regularizer # initialize n = state.∇f |> length - # dual estimate - λ₁ = 0.0 - θ = σ + Sₗ = DRSOM.Lanczos(n, 2n + 1, state.∇f) while true - # if not accepted - # λ (dual) must increase - v, θ, λ₁, kᵢ = TrustRegionCholesky( - H + σ * I, - state.∇f, - Δ; - λ₁=θ - σ - ) + if iter.mainstrategy == :newton + throw(ErrorException("we do not support regularized Newton in Lanczos mode; use Cholesky instead")) + end + # use evolving subspaces state.α = 1.0 + k₁ = (n <= 500) ? round(Int, n * 0.9) : round(Int, n * 0.02) + Ξ = 1e-1 * min(state.∇f |> norm |> sqrt, 1e0) + @debug "minimum subspace size $k₁" + if iter.hvp === nothing + H = iter.H(state.x) + v, θ, info = DRSOM.InexactLanczosTrustRegionBisect( + H, + -state.∇f, + Δ, + Sₗ; + σ=σ * (σ >= 1e-8), + k=Sₗ.k, + k₁=k₁, + Ξ=Ξ + ) + dq = -state.α^2 * v'H * v / 2 - state.α * v'state.∇f + else + v, θ, info = DRSOM.InexactLanczosTrustRegionBisect( + iter.ff, + -state.∇f, + Δ, + Sₗ; + σ=σ * (σ >= 1e-8), + k=Sₗ.k, + k₁=k₁, + Ξ=Ξ + ) + dq = -state.α^2 * v'iter.ff(v) / 2 - state.α * v'state.∇f + end + + λ₁ = info.λ₁ fx = iter.f(state.z + v * state.α) # summarize x = y = state.z + v * state.α df = fz - fx - dq = -state.α^2 * v'H * v / 2 - state.α * v'state.∇f ρₐ = df / dq k₂ += 1 @debug """periodic check (main iterate) - |d|: $(v |> norm):, Δ: $Δ, - θ: $θ, λ₁: $λ₁, - kᵢ: $kᵢ, df: $df, + |d|: $(v |> norm):, + Ξ: $Ξ, + Δ: $Δ, + σ: $σ, + θ: $θ, + λ₁: $λ₁, + kᵢ: $info.kₗ, k₁: $k₁, + df: $df, ρₐ: $ρₐ """ Δ = min(Δ, v |> norm) - if (Δ > 1e-8) && ((df < 0) || ((df < Df) && (ρₐ < 0.2))) # not satisfactory - if abs(λ₁) >= 1e-8 # too cvx or ncvx - σ = 0.0 - else - σ *= γ₁ - end - # dec radius - Δ /= γ₂ - Df /= γ₁ - continue - end - if ρₐ > 0.9 - σ /= γ₁ - Δ *= γ₂ - end + Δ, σ, Df, bool_acc = adaptive_rules_utr(state, df, Df, Δ, σ, ρₐ, γ₁, γ₂, θ, λ₁) + !bool_acc && continue # do this when accept - state.σ = max(1e-12, σ / grad_regularizer) + state.σ = max(1e-18, σ / grad_regularizer) state.r = max(Δ / grad_regularizer, 1e-1) state.k₂ += k₂ state.kₜ = k₂ @@ -335,12 +352,71 @@ function Base.iterate( state.status = true # @info ρₐ state.k += 1 - checknan(state) return state, state end +end +@doc """ + implement the strategy of Mishchenko [Algorithm 2.3, AdaN+](SIOPT, 2023) + this is always accepting method +""" +function initial_rules_mishchenko(state, funcg, Hx, funcH, args...) + σ, _... = args + Δ = 10.0 + bool_acc = true + if state.k == 1 + # first iteration + dx = randn(Float64, state.x |> size) + y = state.x + dx + gx = state.∇f + gy = funcg(y) + M = approximate_lip(dx, gx, gy, Hx) + σ1 = √M + else + dx = state.d + gx = state.∇fz + Hx = funcH(state.z) + gy = state.∇f + M = approximate_lip(dx, gx, gy, Hx) + σ1 = max( + σ / √2, + √M + ) + end + @debug "details:" norm(dx) norm(gy - gx) σ σ1 + return σ1, M, 0.0, bool_acc end + +function adaptive_rules_utr(state, args...) + df, Df, Δ, σ, ρₐ, γ₁, γ₂, θ, λ₁, _... = args + # @info "details:" λ₁ σ + bool_acc = true + if (Δ > 1e-8) && ((df < 0) || ((df < Df) && (ρₐ < 0.2))) # not satisfactory + if abs(λ₁) >= -1e-8 # too cvx or ncvx + σ = 0.0 + else + σ *= γ₁ + end + # dec radius + Δ /= γ₂ + Df /= γ₁ + bool_acc = false + end + if ρₐ > 0.9 + σ /= γ₁ + Δ *= γ₂ + end + return Δ, σ, Df, bool_acc +end + +function approximate_lip(dx, gx, gy, Hx) + return (norm(gy - gx - Hx * dx)) / norm(dx)^2 +end + +#################################################################################################### +# Basic Tools +#################################################################################################### utr_stopping_criterion(tol, state::UTRState) = (state.ϵ <= tol) || (state.Δ <= 1e-12) @@ -356,7 +432,7 @@ end function checknan(state::S) where {S<:UTRState} if any(isnan, state.x) - throw(ErrorException("NaN detected in Lanczos, use debugging to fix")) + @warn(ErrorException("NaN detected in Lanczos, use debugging to fix")) end end @@ -373,8 +449,6 @@ end default_solution(::UTRIteration, state::UTRState) = state.x - - UniversalTrustRegion(; name=:UTR, stop=utr_stopping_criterion, @@ -426,7 +500,9 @@ function Base.show(io::IO, t::T) where {T<:UTRIteration} @printf io " algorithm alias := %s\n" t.ALIAS @printf io " algorithm description := %s\n" t.DESC @printf io " inner iteration limit := %s\n" t.itermax - @printf io " subproblem := %s\n" t.subpstrategy + @printf io " main strategy := %s\n" t.mainstrategy + @printf io " subproblem strategy := %s\n" t.subpstrategy + @printf io " adaptive (σ,Δ) rule := %s\n" t.adaptiverule if t.hvp !== nothing @printf io " second-order info := using provided Hessian-vector product\n" elseif t.H !== nothing @@ -435,6 +511,10 @@ function Base.show(io::IO, t::T) where {T<:UTRIteration} @printf io " unknown mode to compute Hessian info\n" throw(ErrorException("unknown differentiation mode\n")) end + + (t.mainstrategy == :newton) && @printf io " !!! reduce to Regularized Newton Method\n" + (t.initializerule == :mishchenko) && @printf io " !!! - use Mishchenko's strategy\n" + (t.adaptiverule == :constant) && @printf io " !!! - use fixed regularization\n" println(io, "-"^length(t.LOG_SLOTS)) flush(io) end diff --git a/src/utilities/trustregion.jl b/src/utilities/trustregion.jl index 746c591..8f386b1 100644 --- a/src/utilities/trustregion.jl +++ b/src/utilities/trustregion.jl @@ -162,11 +162,10 @@ function TrustRegionCholesky( c::Vector{Float64}, Δ::Float64=0.0; Δϵ::Float64=1e-2, - λ₁=0.0, + λₗ=0.0, λᵤ=Inf ) - - F = cholesky(Q + λ₁ * I, check=false) + F = cholesky(Q + λₗ * I, check=false) if issuccess(F) # meaning psd x = F \ (-c) if norm(x) < Δϵ + Δ @@ -176,7 +175,7 @@ function TrustRegionCholesky( # else it is indefinite. # mild estimate to ensure p.s.d if λᵤ < Inf - λₖ = (λ₁ + λᵤ) / 2 + λₖ = (λₗ + λᵤ) / 2 else λₖ = ((sum(abs, Q - Diagonal(Q), dims=1)[:] + abs.(diag(Q))) |> maximum) / 2 end @@ -192,7 +191,7 @@ function TrustRegionCholesky( end p === nothing ? F.p : p if bool_indef - λ₁ = λₖ / 1.02 + λₗ = λₖ / 1.02 end bool_indef = false @@ -209,24 +208,24 @@ function TrustRegionCholesky( dℓ = -(l1 / (l2 + 1e-2)) if dℓ < 0 λᵤ = min(λᵤ, λₖ) - α = min(0.9995 * ((λₖ - λ₁) / -dℓ), 1.0) + α = min(0.9995 * ((λₖ - λₗ) / -dℓ), 1.0) else - λ₁ = max(λ₁, λₖ) + λₗ = max(λₗ, λₖ) α = min(0.9995 * ((λᵤ - λₖ) / dℓ), 1.0) end if (α <= 0.8) && (λᵤ < Inf) - λₖ = (λᵤ + λ₁) / 2 + λₖ = (λᵤ + λₗ) / 2 else λₖ += α * dℓ end - @debug """iterate""" k, [λ₁, λᵤ], λₖ, α, dℓ, norm2_s, (Δ^2) + @debug """iterate""" k, [λₗ, λᵤ], λₖ, α, dℓ, norm2_s, (Δ^2) k += 1 - if (abs(l1) < Δϵ) || (k > 20) || ((λᵤ - λ₁) < Δϵ) + if (abs(l1) < Δϵ) || (k > 20) || ((λᵤ - λₗ) < Δϵ) break end end - return x, λₖ, λ₁, k, p + return x, λₖ, -λₗ, k, p end \ No newline at end of file diff --git a/test/convex/test_logistic.jl b/test/convex/test_logistic.jl index f2ce9e6..eb5d1e9 100644 --- a/test/convex/test_logistic.jl +++ b/test/convex/test_logistic.jl @@ -40,7 +40,7 @@ using LoopVectorization using LIBSVMFileIO bool_opt = true -bool_plot = false +bool_plot = true bool_q_preprocessed = true f1(A, d=2) = sqrt.(sum(abs2.(A), dims=d)) @@ -178,15 +178,15 @@ if bool_opt time_limit=500 ) - # rn1 = PFH(name=Symbol("iNewton-1e-7"))(; - # x0=copy(x0), f=loss, g=g, hvp=hvpdiff, - # maxiter=40, tol=ε, freq=1, - # step=:newton, μ₀=0.0, - # maxtime=500, linesearch=:backtrack, - # bool_trace=true, - # eigtol=1e-7, - # direction=:warm - # ) + rn1 = PFH(name=Symbol("iNewton-1e-7"))(; + x0=copy(x0), f=loss, g=g, hvp=hvpdiff, + maxiter=40, tol=ε, freq=1, + step=:newton, μ₀=0.0, + maxtime=500, linesearch=:backtrack, + bool_trace=true, + eigtol=1e-7, + direction=:warm + ) # rn2 = PFH(name=Symbol("iNewton-1e-8"))(; # x0=copy(x0), f=loss, g=g, hvp=hvpdiff, # maxiter=60, tol=ε, freq=1, diff --git a/test/convex/test_soft_maximum.jl b/test/convex/test_soft_maximum.jl index a01a752..d562aa3 100644 --- a/test/convex/test_soft_maximum.jl +++ b/test/convex/test_soft_maximum.jl @@ -43,7 +43,7 @@ using LIBSVMFileIO bool_plot = false bool_opt = true -# bool_setup = true +bool_setup = true if bool_setup Random.seed!(2) @@ -89,24 +89,24 @@ if bool_opt show_every=10, time_limit=500 ) - r_lbfgs = Optim.optimize( - loss, grad, x0, - LBFGS(; alphaguess=LineSearches.InitialStatic(), - linesearch=LineSearches.BackTracking()), options; - inplace=false - ) + # r_lbfgs = Optim.optimize( + # loss, grad, x0, + # LBFGS(; alphaguess=LineSearches.InitialStatic(), + # linesearch=LineSearches.BackTracking()), options; + # inplace=false + # ) r_newton = Optim.optimize( loss, grad, hess, x0, Newton(; alphaguess=LineSearches.InitialStatic() ), options; inplace=false ) - r = HSODM()(; - x0=copy(x0), f=loss, g=grad, H=hess, - maxiter=10000, tol=ε, freq=1, - direction=:warm, linesearch=:hagerzhang - ) - r.name = "Adaptive HSODM" + # r = HSODM()(; + # x0=copy(x0), f=loss, g=grad, H=hess, + # maxiter=10000, tol=ε, freq=1, + # direction=:warm, linesearch=:hagerzhang + # ) + # r.name = "Adaptive HSODM" rh = PFH()(; x0=copy(x0), f=loss, g=grad, H=hess, maxiter=10000, tol=ε, freq=1, diff --git a/test/cutest-benchmark/test_setup.jl b/test/cutest-benchmark/test_setup.jl index d00a42b..e9b70b6 100644 --- a/test/cutest-benchmark/test_setup.jl +++ b/test/cutest-benchmark/test_setup.jl @@ -33,9 +33,9 @@ filter_cutest_problem(nlp) = true # filter_optimization_method(k) = k ∈ [:LBFGS, :HSODM] # filter_optimization_method(k) = k ∈ [:DRSOM, :DRSOMHomo] # filter_optimization_method(k) = k == :HSODM -# filter_optimization_method(k) = k ∈ [:iUTRhvp] +filter_optimization_method(k) = k ∈ [:iUTRhvp] # filter_optimization_method(k) = k == :ARC -filter_optimization_method(k) = k == :TRST +# filter_optimization_method(k) = k == :TRST # filter_optimization_method(k) = k == :HSODMArC # filter_optimization_method(k) = k ∈ [:HSODM, :DRSOMHomo] # filter_optimization_method(k) = k ∈ [:DRSOM, :HSODM, :DRSOMHomo] @@ -45,8 +45,8 @@ filter_optimization_method(k) = k == :TRST # choose problem set # PROBLEMS = UNC_PROBLEMS_221104 # PROBLEMS = TEST -# PROBLEMS = UNC_PROBLEMS_4to200 -PROBLEMS = UNC_PROBLEMS_200to5000 +PROBLEMS = UNC_PROBLEMS_4to200 +# PROBLEMS = UNC_PROBLEMS_200to5000 if test_before_start ###################################################################### From d52a867c23610fadaac0a71d67924ccf94fc63e6 Mon Sep 17 00:00:00 2001 From: cz Date: Wed, 25 Oct 2023 15:58:04 +0800 Subject: [PATCH 04/17] fix bugs --- src/algorithms/utr.jl | 2 +- test/Project.toml | 2 ++ .../src/AdaptiveRegularization.jl | 4 ++-- test/tools.jl | 19 ++++++++++++++++++- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/algorithms/utr.jl b/src/algorithms/utr.jl index f0ff171..386ce6f 100644 --- a/src/algorithms/utr.jl +++ b/src/algorithms/utr.jl @@ -32,7 +32,7 @@ Base.@kwdef mutable struct UTRIteration{Tx,Tf,Tϕ,Tg,TH,Th} mainstrategy = :utr subpstrategy = :direct adaptiverule = :utr - initializerule = :mishchenko + initializerule = :undef LOG_SLOTS::String = UTR_LOG_SLOTS ALIAS::String = "UTR" DESC::String = "Universal Trust-Region Method" diff --git a/test/Project.toml b/test/Project.toml index 4d84a0a..8254d76 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,5 @@ [deps] +ADNLPModels = "54578032-b7ea-4c30-94aa-7cbd1cce6c9a" AdaptiveRegularization = "79adfba0-1eab-50d9-8528-0589417a2bfe" ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" @@ -34,6 +35,7 @@ Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" SpecialMatrices = "928aab9d-ef52-54ac-8ca1-acd7ca42c160" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" +Stopping = "c4fe5a9e-e7fb-5c3d-89d5-7f405ab2214f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" VectorizedReduction = "4ffe575c-65e5-43f4-bc05-e0b500dc3d2c" diff --git a/test/third-party/AdaptiveRegularization.jl/src/AdaptiveRegularization.jl b/test/third-party/AdaptiveRegularization.jl/src/AdaptiveRegularization.jl index ed1cf63..135af53 100755 --- a/test/third-party/AdaptiveRegularization.jl/src/AdaptiveRegularization.jl +++ b/test/third-party/AdaptiveRegularization.jl/src/AdaptiveRegularization.jl @@ -91,7 +91,7 @@ function TRARC end function TRARC(nlp::AbstractNLPModel{T, S}; kwargs...) where {T, S} nlpstop = NLPStopping(nlp; optimality_check = (pb, state) -> norm(state.gx), kwargs...) nlpstop = TRARC(nlpstop; kwargs...) - return stopping_to_stats(nlpstop) + return stopping_to_stats(nlpstop), nlpstop #stopping_to_stats(nlpstop) end for fun in union(keys(solvers_const), keys(solvers_nls_const)) @@ -112,7 +112,7 @@ for fun in union(keys(solvers_const), keys(solvers_nls_const)) function $fun(nlp::AbstractNLPModel{T, S}; kwargs...) where {T, S} nlpstop = NLPStopping(nlp; optimality_check = (pb, state) -> norm(state.gx), kwargs...) nlpstop = $fun(nlpstop; kwargs...) - return stopping_to_stats(nlpstop) + return stopping_to_stats(nlpstop), nlpstop end end @eval export $fun diff --git a/test/tools.jl b/test/tools.jl index c43448a..6b7326d 100644 --- a/test/tools.jl +++ b/test/tools.jl @@ -73,7 +73,24 @@ function arc_to_result(nlp, stats, name) return Result(name=name, iter=stats, state=state, k=stats.iter, trajectory=[]) end -export StateOptim, optim_to_result, arc_to_result + +function arc_stop_to_result(nlp, rr, name) + ts = rr.listofstates[1][1].current_time + traj = [] + for i in 1:rr.listofstates.i + x = rr.listofstates[i] + st = StateOptim(fx=x[1].fx, ϵ=x[1].gx |> norm, t=x[1].current_time - ts) + push!(traj, st) + end + traj[end].kf = neval_obj(nlp) + traj[end].kg = neval_grad(nlp) + traj[end].kH = neval_hess(nlp) + traj[end].kh = neval_hprod(nlp) + stats = AdaptiveRegularization.stopping_to_stats(rr) + return Result(name=name, iter=rr, state=stats, k=stats.iter, trajectory=traj) +end + +export StateOptim, optim_to_result, arc_to_result, arc_stop_to_result From 3ef72f56b44c4af49a8975e2fcc6bde2cbd33a93 Mon Sep 17 00:00:00 2001 From: cz Date: Sat, 2 Dec 2023 21:48:32 +0800 Subject: [PATCH 05/17] add randomized example. --- test/convex/test_linear_system.jl | 36 ++--- test/convex/test_linear_system_random.jl | 176 +++++++++++++++++++++++ 2 files changed, 195 insertions(+), 17 deletions(-) create mode 100644 test/convex/test_linear_system_random.jl diff --git a/test/convex/test_linear_system.jl b/test/convex/test_linear_system.jl index 18a0ee8..2fd8c4d 100644 --- a/test/convex/test_linear_system.jl +++ b/test/convex/test_linear_system.jl @@ -43,7 +43,8 @@ name = "a4a" # name = "covtype" # name = "news20" # name = "rcv1" -names = ["a4a"] #, "a9a", "w4a", "covtype", "rcv1", "news20"] +# names = ["a4a"] #, "a9a", "w4a", "covtype", "rcv1", "news20"] +names = ["covtype"] #, "a9a", "w4a", "covtype", "rcv1", "news20"] # names = ["a4a", "a9a", "w4a", "covtype", "rcv1"] @warn "news20 is very big..., consider run on a server" # use the data matrix of libsvm @@ -61,7 +62,7 @@ if name in ["covtype"] else end -γ = 1e-6 +γ = 1e-10 n = Xv[1, :] |> length Random.seed!(1) N = y |> length @@ -110,21 +111,22 @@ rl = KrylovKit.eigsolve( r["GHM-Lanczos"].normres += (Fc(ξ₁) - λ₁ .* ξ₁) |> norm r["GHM-Lanczos"].numops += rl[end].numops -rl = KrylovKit.linsolve( - Hc, -g, w₀, CG(; tol=ε, maxiter=max_iteration, verbosity=3); -) -r["Newton-CG"].normres += ((hvp(rl[1]) + g) |> norm) -r["Newton-CG"].numops += rl[end].numops -rl = KrylovKit.linsolve( - Hc, -g, w₀, GMRES(; tol=ε, maxiter=1, krylovdim=max_iteration, verbosity=3); -) -r["Newton-GMRES"].normres += ((hvp(rl[1]) + g) |> norm) -r["Newton-GMRES"].numops += rl[end].numops -rl = KrylovKit.linsolve( - Hc, -g, w₀, GMRES(; tol=ε, maxiter=4, krylovdim=div(max_iteration, 4), verbosity=3); -) -r["Newton-rGMRES"].normres += ((hvp(rl[1]) + g) |> norm) -r["Newton-rGMRES"].numops += rl[end].numops +if false: + rl = KrylovKit.linsolve( + Hc, -g, w₀, CG(; tol=ε, maxiter=max_iteration, verbosity=3); + ) + r["Newton-CG"].normres += ((hvp(rl[1]) + g) |> norm) + r["Newton-CG"].numops += rl[end].numops + rl = KrylovKit.linsolve( + Hc, -g, w₀, GMRES(; tol=ε, maxiter=1, krylovdim=max_iteration, verbosity=3); + ) + r["Newton-GMRES"].normres += ((hvp(rl[1]) + g) |> norm) + r["Newton-GMRES"].numops += rl[end].numops + rl = KrylovKit.linsolve( + Hc, -g, w₀, GMRES(; tol=ε, maxiter=4, krylovdim=div(max_iteration, 4), verbosity=3); + ) + r["Newton-rGMRES"].normres += ((hvp(rl[1]) + g) |> norm) + r["Newton-rGMRES"].numops += rl[end].numops ################################ # ALTERNATIVELY, USE Krylov.jl diff --git a/test/convex/test_linear_system_random.jl b/test/convex/test_linear_system_random.jl new file mode 100644 index 0000000..8ff560c --- /dev/null +++ b/test/convex/test_linear_system_random.jl @@ -0,0 +1,176 @@ +@doc raw""" + We test the eigensolve is stable at an ill-conditioned matrix from LIBSVM +""" + + + +include("../lp.jl") +include("../tools.jl") + +using ArgParse +using DRSOM +using Distributions +using LineSearches +using Optim +using ProximalOperators +using ProximalAlgorithms +using Random +using Plots +using Printf +using LazyStack +using KrylovKit +using HTTP +using LaTeXStrings +using LinearAlgebra +using ForwardDiff +using Statistics +using LinearOperators +using Optim +using SparseArrays +using DRSOM +using .LP +using LoopVectorization +using LIBSVMFileIO +using DataFrames +using CSV +Base.@kwdef mutable struct KrylovInfo + normres::Float64 + numops::Float64 +end +table = [] + +bool_gen = true +bool_solve = true + +if bool_gen + f1(A, d=2) = sqrt.(sum(abs2.(A), dims=d)) + Random.seed!(1) + m = 6000 + n = 10000 + nnz = 0.3 + + @info "data reading start" m n nnz + name = "random" + A = sprand(Float64, m, n, nnz) + wₛ = rand(Float64, n) + wₛ ./= norm(wₛ) + y = A * wₛ + ϵ(w) = A*w - y + f(w) = 0.5*sum(abs2,ϵ(w)) + fₛ = f(wₛ) + + g(w) = ForwardDiff.gradient(f, w) + + γ = 0.0 # 1e-10 + Random.seed!(1) + N = y |> length + # Q = A' * A + # function gfs(w) + # return Q * w - A' * y + # end + r = Dict() + # + r["GHM-Lanczos"] = KrylovInfo(normres=0.0, numops=0) + r["Newton-CG"] = KrylovInfo(normres=0.0, numops=0) + r["Newton-GMRES"] = KrylovInfo(normres=0.0, numops=0) + r["Newton-rGMRES"] = KrylovInfo(normres=0.0, numops=0) + w₀ = rand(Float64, n) + w₀ ./= norm(w₀) + r₀ = ϵ(w₀) + δ₀ = norm(r₀)^2 + g₀ = A'r₀ + + Fw(w) = [ + (A' * ϵ(w[1:end-1]) + A'y + g₀ * w[end]); + (w[1:end-1]' * g₀ + δ₀ * w[end]) + ] + Fc = DRSOM.Counting(Fw) + + +end +Fc([w₀; 1]) +@info "data reading finished" m n nnz +if bool_solve + + max_iteration = 15 + ε = 1e-7 + + ts = time() + rl = KrylovKit.eigsolve( + Fc, [w₀; 1], 1, :SR, + Lanczos( + tol=ε, + maxiter=100, + verbosity=2, + eager=true + ); + ) + λ₁ = rl[1] + ξ₁ = rl[2][1] + + x = ξ₁[1:n] / ξ₁[n+1] + + r["GHM-Lanczos"].normres += norm(A * x + r₀) + r["GHM-Lanczos"].numops += rl[end].numops + + te = time() + + @info "result" r["GHM-Lanczos"] (te - ts) + + if false + rl = KrylovKit.linsolve( + Hc, -g, w₀, CG(; tol=ε, maxiter=max_iteration, verbosity=3); + ) + r["Newton-CG"].normres += ((hvp(rl[1]) + g) |> norm) + r["Newton-CG"].numops += rl[end].numops + rl = KrylovKit.linsolve( + Hc, -g, w₀, GMRES(; tol=ε, maxiter=1, krylovdim=max_iteration, verbosity=1); + ) + r["Newton-GMRES"].normres += ((hvp(rl[1]) + g) |> norm) + r["Newton-GMRES"].numops += rl[end].numops + rl = KrylovKit.linsolve( + Hc, -g, w₀, GMRES(; tol=ε, maxiter=4, krylovdim=div(max_iteration, 4), verbosity=3); + ) + r["Newton-rGMRES"].normres += ((hvp(rl[1]) + g) |> norm) + r["Newton-rGMRES"].numops += rl[end].numops + end + ################################ + # ALTERNATIVELY, USE Krylov.jl + ################################ + # opH = LinearOperator( + # Float64, n, n, true, true, + # (y, v) -> hvp!(y, v) + # ) + # d, __unused_info = cg( + # opH, -g; verbose=1, itmax=200 + # ) + + # end + for (k, v) in r + push!(table, [name, n, k, v.numops, v.normres]) + end + # end + + tmat = hcat(table...) + df = DataFrame( + name=tmat[1, :], + method=tmat[3, :], + k=string.(tmat[4, :]), + ϵ=tmat[5, :] + ) + + CSV.write("/tmp/linsys1.csv", df) + + """ + import pandas as pd + df = pd.read_csv("/tmp/linsys.csv") + print( + df.set_index(["name", "method"] + ).to_latex( + multirow=True, + longtable=True, + float_format="%.1e" + ) + ) + """ +end \ No newline at end of file From 0e23f162d088c7468b3130e8a6fd4c0b71c0e29b Mon Sep 17 00:00:00 2001 From: cz Date: Thu, 28 Dec 2023 15:17:25 +0800 Subject: [PATCH 06/17] summarizing changes --- src/algorithms/hsodm.jl | 220 ++++++++++------------ src/utilities/homogeneous.jl | 5 +- test/cutest-benchmark/hsodm_paper_test.jl | 4 +- test/cutest-benchmark/test_setup.jl | 91 ++++----- 4 files changed, 156 insertions(+), 164 deletions(-) diff --git a/src/algorithms/hsodm.jl b/src/algorithms/hsodm.jl index 12818b1..89662a8 100644 --- a/src/algorithms/hsodm.jl +++ b/src/algorithms/hsodm.jl @@ -18,7 +18,7 @@ Base.@kwdef mutable struct HSODMIteration{Tx,Tf,Tϕ,Tg,TH,Th} g::Tg = nothing # gradient function H::TH = nothing # hessian function hvp::Th = nothing # hessian-vector product - fvp::Union{Function,Nothing} = nothing # hessian-vector product of Fk (defined from hvp) + fvp::Union{Function,Nothing} = nothing # hessian-vector product of Fₖ (defined from hvp) x0::Tx # initial point t::Dates.DateTime = Dates.now() adaptive_param = AR() # todo @@ -42,31 +42,34 @@ Base.@kwdef mutable struct HSODMState{R,Tx} x::Tx # iterate fx::R # new value f at x: x(k) fz::R # old value f at z: x(k-1) - ∇f::Tx # gradient of f at x - ∇fz::Tx # gradient of f at z - ∇fb::Tx # buffer of hvps y::Tx # forward point z::Tx # previous point d::Tx # momentum/fixed-point diff at iterate (= x - z) - Δ::R # trust-region radius - dq::R # decrease of estimated quadratic model - df::R # decrease of the real function value - ρ::R # trs descrease ratio: ρ = df/dq - ϵ::R # eps 2: residual for gradient + ξ::Tx # eigenvector + ∇f::Tx # gradient of f at x + ∇fz::Tx # gradient of f at z + ∇fb::Tx # buffer of hvps + Δ::R = 1e10 # "trust-region" radius + dq::R = 0.0 # decrease of estimated quadratic model + df::R = 0.0 # decrease of the real function value + ρ::R = 0.0 # trs descrease ratio: ρ = df/dq + ϵ::R = 0.0 # eps: residual for gradient α::R = 1e-5 # step size γ::R = 1e-5 # trust-region style parameter γ σ::R = 1e3 # adaptive arc style parameter σ - kᵥ::Int = 1 # krylov iterations - kₜ::Int = 1 # inner iterations t::R = 0.0 # running time λ₁::Float64 = 0.0 # smallest curvature if available - δ::Float64 = 1e-3 # smallest curvature if available - ξ::Tx # eigenvector - kf::Int = 0 # function evaluations - kg::Int = 0 # gradient evaluations - kH::Int = 0 # hessian evaluations - kh::Int = 0 # hvp evaluations - k₂::Int = 0 # 2 oracle evaluations + δ::Float64 = 1e-3 # smallest curvature if available + ######################################################### + kₜ::Int = 1 # inner iterations + kᵥ::Int = 1 # krylov iterations + kf::Int = 0 # function evaluations + kg::Int = 0 # gradient evaluations + kH::Int = 0 # hessian evaluations + kh::Int = 0 # hvp evaluations + k₂::Int = 0 # 2nd oracle evaluations + kₛ::Int = 0 # stagnation counter + ######################################################### end @@ -75,102 +78,34 @@ function Base.iterate(iter::HSODMIteration) iter.t = Dates.now() z = copy(iter.x0) fz = iter.f(z) - grad_f_x = similar(z) grad_f_x = iter.g(z) n = length(z) Hv = similar(grad_f_x) - if iter.hvp === nothing - H = iter.H(z) - # construct homogeneous system - B = Symmetric([H grad_f_x; SparseArrays.spzeros(n)' -1e-3]) - vals, vecs, info = KrylovKit.eigsolve( - B, n + 1, 1, :SR, Float64; - issymmetric=true, tol=iter.eigtol - ) - else - fvp(x, g, v, Hv, d) = ( - iter.hvp(x, v[1:end-1], Hv); - [ - Hv + v[end] * g - g'v[1:end-1] + d * v[end] - ] - ) - iter.fvp = fvp - ff(v) = iter.fvp(z, grad_f_x, v, Hv, 0) - vals, vecs, info = KrylovKit.eigsolve( - ff, n + 1, 1, :SR, Float64; - issymmetric=true, tol=iter.eigtol - ) - end - kᵥ = info.numops - λ₁ = vals[1] - ξ = vecs[1] - v = reshape(ξ[1:end-1], n) - t₀ = ξ[end] - (abs(t₀) > 1e-3) && (v = v / t₀) - - vn = norm(v) - if iter.hvp === nothing - vHv = (v'*H*v/2)[] - else - vHv = v' * Hv - end - vg = (v'*grad_f_x)[] - bool_reverse_v = vg > 0 - # reverse this v if g'v > 0 - v = (-1)^bool_reverse_v * v - vg = (-1)^bool_reverse_v * vg - α = 0.0 - # now use a LS to solve (state.α) - if iter.linesearch == :hagerzhang - # use Hager-Zhang line-search algorithm - α, fx, kₜ = HagerZhangLineSearch(iter, grad_f_x, fz, z, v) - end - if (α == 0) || (iter.linesearch == :trustregion) - α, fx, kₜ = TRStyleLineSearch(iter, z, v, vHv, vg, 1.0) - end - if (α == 0) || (iter.linesearch == :backtrack) - # use Hager-Zhang line-search algorithm - α, fx, kₜ = BacktrackLineSearch(iter, grad_f_x, fz, z, v) - end - if (α == 0) || (iter.linesearch == :none) - α = 1.0 - fx = iter.f(z + v * α) - kₜ = 1 - else - end - y = z + α .* v - fx = iter.f(y) - dq = -α^2 * vHv / 2 - α * vg - df = fz - fx - ro = df / dq - Δ = vn * α - t = (Dates.now() - iter.t).value / 1e3 - d = y - z state = HSODMState( - x=y, - y=y, + x=z, + y=z, z=z, - fx=fx, + d=zeros(n), + fx=fz, fz=fz, ∇f=grad_f_x, ∇fz=z, ∇fb=Hv, - α=α, - d=d, - Δ=Δ, - dq=dq, - df=df, - ρ=ro, - ϵ=norm(grad_f_x, 2), - γ=1e-6, - kᵥ=kᵥ, - kₜ=kₜ, - t=t, - δ=0.0, - ξ=ones(length(z) + 1), - λ₁=λ₁ + ξ=ones(Float64, n + 1), + ϵ=grad_f_x |> norm, ) + if iter.hvp === nothing + return state, state + end + fvp(x, g, v, Hv, d) = ( + iter.hvp(x, v[1:end-1], Hv); + [ + Hv + v[end] * g + g'v[1:end-1] + d * v[end] + ] + ) + iter.fvp = fvp + ff(v) = iter.fvp(z, grad_f_x, v, Hv, 0) return state, state end @@ -184,18 +119,16 @@ function Base.iterate(iter::HSODMIteration, state::HSODMState{R,Tx}) where {R,Tx state.∇f = iter.g(state.x) n = state.∇f |> length - ng = norm(state.∇f) + gₙ = norm(state.∇f) + # construct homogeneous system and solve if iter.hvp === nothing H = iter.H(state.x) - # construct homogeneous system B = Symmetric([H state.∇f; SparseArrays.spzeros(n)' state.δ]) - kᵥ, k₂, v, vn, vg, vHv = AdaptiveHomogeneousSubproblem( B, iter, state, iter.adaptive_param; verbose=iter.verbose > 1 ) else - n = length(z) ff(v) = iter.fvp(state.x, state.∇f, v, state.∇fb, state.δ) kᵥ, k₂, v, vn, vg, vHv = AdaptiveHomogeneousSubproblem( @@ -230,7 +163,19 @@ function Base.iterate(iter::HSODMIteration, state::HSODMState{R,Tx}) where {R,Tx ) return state, state end + ################################################################# # summarize + ################################################################# + if hsodm_stucking_criterion(iter, state) + state.kₛ += 1 + else + state.kₛ = 0 + end + if state.kₛ > 10 + state.status = false + else + state.status = true + end state.Δ = state.α * vn x = y = state.z + v * state.α dq = -state.α^2 * vHv / 2 - state.α * vg @@ -249,20 +194,63 @@ function Base.iterate(iter::HSODMIteration, state::HSODMState{R,Tx}) where {R,Tx state.ϵ = norm(state.∇f) state.t = (Dates.now() - iter.t).value / 1e3 - if ng > 5e-3 - state.δ = 1e-1 - end - if state.λ₁ < 0 - state.δ = state.δ + 8e-3 - else - state.δ = state.δ - 1e-2 + ################################################################# + # adjust δ, + # if it is not adjusted in the ArC style + ################################################################# + # if ng > 5e-3 + # state.δ = 1e-1 + # end + # if state.λ₁ < 0 + # state.δ = state.δ + 8e-3 + # else + # state.δ = state.δ - 1e-2 + # end + if iter.adaptive == :none + state.δ = -1e-3 + elseif iter.adaptive == :mishchenko + if iter.hvp === nothing + σ1 = hsodm_rules_mishchenko(iter, state, gₙ, H) + else + σ1 = hsodm_rules_mishchenko(iter, state, gₙ) + end + state.δ = -state.ϵ / σ1 end counting(iter, state) - state.status = true + return state, state end +@doc """ + implement the strategy of Mishchenko [Algorithm 2.3, AdaN+](SIOPT, 2023) + this is always accepting method +""" +function hsodm_rules_mishchenko(iter, state, args...) + σ = (iter.g(state.z) |> norm) / abs(state.δ) + dx = state.d + gx = state.∇fz + gy = state.∇f + if iter.hvp === nothing + gₙ, Hx, _... = args + M = (norm(gy - gx - Hx * dx)) / norm(dx)^2 + else + iter.hvp(z, dx, state.∇fb) + M = (norm(gy - gx - state.∇fb)) / norm(dx)^2 + end + σ1 = max( + σ / √2, + √M + ) + @debug "details:" norm(dx) norm(gy - gx) σ σ1 + return σ1 +end + + +function hsodm_stucking_criterion(iter::HSODMIteration, state::HSODMState) + return false +end + hsodm_stopping_criterion(tol, state::HSODMState) = (state.Δ <= 1e-20) || (state.ϵ <= tol) && abs(state.fz - state.fx) <= tol diff --git a/src/utilities/homogeneous.jl b/src/utilities/homogeneous.jl index bbd5c2e..4ea052b 100644 --- a/src/utilities/homogeneous.jl +++ b/src/utilities/homogeneous.jl @@ -47,7 +47,7 @@ function AdaptiveHomogeneousSubproblem(B, iter, state, adaptive_param::AR; verbo kᵥ, λ₁, ξ, t₀, v, vn, vg, vHv = homogeneous_eigenvalue(B, iter, state) # non-adaptive mode - if iter.adaptive === :none + if iter.adaptive != :arc state.λ₁ = λ₁ state.ξ = ξ return kᵥ, k₂, v, vn, vg, vHv @@ -139,7 +139,6 @@ function AdaptiveHomogeneousSubproblem(B, iter, state, adaptive_param::AR; verbo end -# adaptive GHMs wrapping ArC style function AdaptiveHomogeneousSubproblem(f::Function, iter, state, adaptive_param::AR; verbose::Bool=false) kᵥ = 1 # krylov iterations k₂ = 1 # second-order eigenvalue calls @@ -147,7 +146,7 @@ function AdaptiveHomogeneousSubproblem(f::Function, iter, state, adaptive_param: kᵥ, λ₁, ξ, t₀, v, vn, vg, vHv = homogeneous_eigenvalue(f, iter, state) # non-adaptive mode - if iter.adaptive === :none + if iter.adaptive != :arc state.λ₁ = λ₁ # printstyled(ξ) state.ξ = ξ[:, 1] diff --git a/test/cutest-benchmark/hsodm_paper_test.jl b/test/cutest-benchmark/hsodm_paper_test.jl index 491f2d3..8403691 100644 --- a/test/cutest-benchmark/hsodm_paper_test.jl +++ b/test/cutest-benchmark/hsodm_paper_test.jl @@ -11,7 +11,9 @@ UNC_PROBLEMS_4to200 = Dict( ) UNC_PROBLEMS_200to5000 = Dict("ARWHEAD" => ["N=1000"], "BDQRTIC" => ["N=1000"], "BOX" => ["N=1000"], "BOXPOWER" => ["N=1000"], "BROWNAL" => ["N=1000"], "BROYDN3DLS" => ["KAPPA1=2.0,KAPPA2=1.0,N=1000"], "BROYDN7D" => ["N/2=250"], "BROYDNBDLS" => ["KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=1000,UB=1"], "BRYBND" => ["KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=1000,UB=1"], "CHAINWOO" => ["NS=499"], "COSINE" => ["N=1000"], "CRAGGLVY" => ["M=499"], "CURLY10" => ["N=1000"], "CURLY20" => ["N=1000"], "CURLY30" => ["N=1000"], "DIXMAANA" => ["M=1000"], "DIXMAANB" => ["M=1000"], "DIXMAANC" => ["M=1000"], "DIXMAAND" => ["M=1000"], "DIXMAANE" => ["M=1000"], "DIXMAANF" => ["M=1000"], "DIXMAANG" => ["M=1000"], "DIXMAANH" => ["M=1000"], "DIXMAANI" => ["M=1000"], "DIXMAANJ" => ["M=1000"], "DIXMAANK" => ["M=1000"], "DIXMAANL" => ["M=1000"], "DIXMAANM" => ["M=1000"], "DIXMAANN" => ["M=1000"], "DIXMAANO" => ["M=1000"], "DIXMAANP" => ["M=1000"], "DIXON3DQ" => ["N=1000"], "DQDRTIC" => ["N=1000"], "DQRTIC" => ["N=1000"], "EDENSCH" => ["N=2000"], "EIGENALS" => ["N=50"], "EIGENBLS" => ["N=50"], "EIGENCLS" => ["M=25"], "ENGVAL1" => ["N=1000"], "EXTROSNB" => ["N=1000"], "FLETBV3M" => ["KAPPA=0.0,N=1000"], "FLETCBV2" => ["KAPPA=0.0,N=1000"], "FLETCBV3" => ["KAPPA=0.0,N=1000"], "FLETCHBV" => ["KAPPA=0.0,N=1000"], "FLETCHCR" => ["N=1000"], "FMINSRF2" => ["P=31"], "FMINSURF" => ["P=31"], "FREUROTH" => ["N=1000"], "GENHUMPS" => ["N=1000,ZETA=20.0"], "GENROSE" => ["N=500"], "INDEF" => ["ALPHA=0.5,N=1000"], "INDEFM" => ["ALPHA=0.5,N=1000"], "INTEQNELS" => ["N=500"], "JIMACK" => ["M=2,N=12"], "LIARWHD" => ["N=1000"], "MODBEALE" => ["ALPHA=50.0,N/2=1000"], "MOREBV" => ["N=1000"], "MSQRTALS" => ["P=70"], "MSQRTBLS" => ["P=70"], "NCB20" => ["N=1000"], "NCB20B" => ["N=1000"], "NONCVXU2" => ["N=1000"], "NONCVXUN" => ["N=1000"], "NONDIA" => ["N=1000"], "NONDQUAR" => ["N=1000"], "NONMSQRT" => ["P=70"], "OSCIGRAD" => ["N=1000,RHO=500.0"], "OSCIPATH" => ["N=500,RHO=500.0"], "PENALTY1" => ["N=1000"], "PENALTY2" => ["N=1000"], "POWELLSG" => ["N=1000"], "POWER" => ["N=1000"], "QUARTC" => ["N=1000"], "SBRYBND" => ["N=1000"], "SCHMVETT" => ["N=1000"], "SCOSINE" => ["N=1000"], "SCURLY10" => ["N=1000"], "SCURLY20" => ["N=1000"], "SCURLY30" => ["N=1000"], "SENSORS" => ["N=1000"], "SINQUAD" => ["N=1000"], "SPARSINE" => ["N=1000"], "SPARSQUR" => ["N=1000"], "SPMSRTLS" => ["M=334"], "SROSENBR" => ["N/2=250"], "SSBRYBND" => ["N=1000"], "SSCOSINE" => ["N=1000"], "TESTQUAD" => ["N=1000"], "TOINTGSS" => ["N=1000"], "TQUARTIC" => ["N=1000"], "TRIDIA" => ["ALPHA=2.0,BETA=1.0,DELTA=1.0,GAMMA=1.0,N=1000"], "VAREIGVL" => ["M=4,N=499,Q=1.5"], "WOODS" => ["NS=1000"], "YATP1LS" => ["N=50"], "YATP2LS" => ["N=50"]) - +UNC_PROBLEMS_COMB = [ + UNC_PROBLEMS_4to200..., UNC_PROBLEMS_200to5000... +] UNC_PROBLEMS_221104 = Dict( "ARGLINA" => [ diff --git a/test/cutest-benchmark/test_setup.jl b/test/cutest-benchmark/test_setup.jl index e9b70b6..99fe890 100644 --- a/test/cutest-benchmark/test_setup.jl +++ b/test/cutest-benchmark/test_setup.jl @@ -2,7 +2,7 @@ include("../tools.jl") include("./hsodm_paper_test.jl") -log_freq = 1 +log_freq = 200 precision = tol_grad = 1e-5 max_iter = 20000 max_time = 200.0 @@ -32,8 +32,10 @@ filter_cutest_problem(nlp) = true # filter_optimization_method(k) = k == :LBFGS # filter_optimization_method(k) = k ∈ [:LBFGS, :HSODM] # filter_optimization_method(k) = k ∈ [:DRSOM, :DRSOMHomo] -# filter_optimization_method(k) = k == :HSODM -filter_optimization_method(k) = k ∈ [:iUTRhvp] +filter_optimization_method(k) = k == :HSODM +# filter_optimization_method(k) = k == :HSODMhvp +# filter_optimization_method(k) = k ∈ [:HSODMhvp, :HSODM] +# filter_optimization_method(k) = k ∈ [:iUTRhvp] # filter_optimization_method(k) = k == :ARC # filter_optimization_method(k) = k == :TRST # filter_optimization_method(k) = k == :HSODMArC @@ -45,8 +47,9 @@ filter_optimization_method(k) = k ∈ [:iUTRhvp] # choose problem set # PROBLEMS = UNC_PROBLEMS_221104 # PROBLEMS = TEST -PROBLEMS = UNC_PROBLEMS_4to200 +# PROBLEMS = UNC_PROBLEMS_4to200 # PROBLEMS = UNC_PROBLEMS_200to5000 +PROBLEMS = UNC_PROBLEMS_COMB if test_before_start ###################################################################### @@ -62,26 +65,26 @@ if test_before_start hvp(x, v, Hv) = NLPModels.hprod!(nlp, x, v, Hv) - @testset "DRSOM" begin - options_drsom = Dict( - :maxiter => max_iter, - :maxtime => max_time, - :tol => 1e-5, - :freq => log_freq - ) - r = wrapper_drsom(x0, loss, g, H, options_drsom) - @test r.state.ϵ < 1e-4 - end - @testset "DRSOM-d" begin - options_drsom = Dict( - :maxiter => max_iter, - :maxtime => max_time, - :tol => 1e-5, - :freq => log_freq - ) - r = wrapper_drsomd(x0, loss, g, H, options_drsom; hvp=hvp) - @test r.state.ϵ < 1e-4 - end + # @testset "DRSOM" begin + # options_drsom = Dict( + # :maxiter => max_iter, + # :maxtime => max_time, + # :tol => 1e-5, + # :freq => log_freq + # ) + # r = wrapper_drsom(x0, loss, g, H, options_drsom) + # @test r.state.ϵ < 1e-4 + # end + # @testset "DRSOM-d" begin + # options_drsom = Dict( + # :maxiter => max_iter, + # :maxtime => max_time, + # :tol => 1e-5, + # :freq => log_freq + # ) + # r = wrapper_drsomd(x0, loss, g, H, options_drsom; hvp=hvp) + # @test r.state.ϵ < 1e-4 + # end @testset "HSODM" begin options_drsom = Dict( :maxiter => max_iter, @@ -92,26 +95,26 @@ if test_before_start r = wrapper_hsodm(x0, loss, g, H, options_drsom) @test r.state.ϵ < 1e-4 end - @testset "UTR" begin - options_drsom = Dict( - :maxiter => max_iter, - :maxtime => max_time, - :tol => 1e-5, - :freq => log_freq - ) - r = wrapper_utr(x0, loss, g, H, options_drsom) - @test r.state.ϵ < 1e-4 - end - @testset "UTR" begin - options_drsom = Dict( - :maxiter => max_iter, - :maxtime => max_time, - :tol => 1e-5, - :freq => log_freq - ) - r = wrapper_iutr_hvp(x0, loss, g, H, options_drsom; hvp=hvp) - @test r.state.ϵ < 1e-4 - end + # @testset "UTR" begin + # options_drsom = Dict( + # :maxiter => max_iter, + # :maxtime => max_time, + # :tol => 1e-5, + # :freq => log_freq + # ) + # r = wrapper_utr(x0, loss, g, H, options_drsom) + # @test r.state.ϵ < 1e-4 + # end + # @testset "UTR" begin + # options_drsom = Dict( + # :maxiter => max_iter, + # :maxtime => max_time, + # :tol => 1e-5, + # :freq => log_freq + # ) + # r = wrapper_iutr_hvp(x0, loss, g, H, options_drsom; hvp=hvp) + # @test r.state.ϵ < 1e-4 + # end finalize(nlp) end end \ No newline at end of file From 42c5f809333366293c12f73c1c5a4366317efafb Mon Sep 17 00:00:00 2001 From: cz Date: Thu, 28 Dec 2023 20:33:37 +0800 Subject: [PATCH 07/17] update strategies --- src/algorithms/hsodm.jl | 4 +- src/utilities/homogeneous.jl | 2 +- test/cutest-benchmark/hsodm_paper_test.jl | 218 ++++++++++++++++++++- test/cutest-benchmark/test_cutest_batch.jl | 2 +- 4 files changed, 212 insertions(+), 14 deletions(-) diff --git a/src/algorithms/hsodm.jl b/src/algorithms/hsodm.jl index 89662a8..e5a2ad1 100644 --- a/src/algorithms/hsodm.jl +++ b/src/algorithms/hsodm.jl @@ -207,7 +207,7 @@ function Base.iterate(iter::HSODMIteration, state::HSODMState{R,Tx}) where {R,Tx # state.δ = state.δ - 1e-2 # end if iter.adaptive == :none - state.δ = -1e-3 + state.δ = 0 elseif iter.adaptive == :mishchenko if iter.hvp === nothing σ1 = hsodm_rules_mishchenko(iter, state, gₙ, H) @@ -235,7 +235,7 @@ function hsodm_rules_mishchenko(iter, state, args...) gₙ, Hx, _... = args M = (norm(gy - gx - Hx * dx)) / norm(dx)^2 else - iter.hvp(z, dx, state.∇fb) + iter.hvp(state.z, dx, state.∇fb) M = (norm(gy - gx - state.∇fb)) / norm(dx)^2 end σ1 = max( diff --git a/src/utilities/homogeneous.jl b/src/utilities/homogeneous.jl index 4ea052b..613d966 100644 --- a/src/utilities/homogeneous.jl +++ b/src/utilities/homogeneous.jl @@ -254,7 +254,7 @@ end homogeneous_eigenvalue = Counting(_inner_homogeneous_eigenvalue) function eigenvalue( - B::Symmetric{Float64,F}, iter, state; bg=:krylov#:arnoldi + B::Symmetric{Float64,F}, iter, state; bg=:arnoldi ) where {F<:Union{SparseMatrixCSC{Float64,Int64},Matrix{Float64}}} n = length(state.x) diff --git a/test/cutest-benchmark/hsodm_paper_test.jl b/test/cutest-benchmark/hsodm_paper_test.jl index 8403691..c819ae6 100644 --- a/test/cutest-benchmark/hsodm_paper_test.jl +++ b/test/cutest-benchmark/hsodm_paper_test.jl @@ -1,18 +1,216 @@ # unconstrained problems 2022/11/04 # I select a set of unconstrained problems -TEST = Dict( - "EXTROSNB" => ["N=100"] -) -UNC_PROBLEMS_4to200 = Dict( - "ARGLINA" => ["M=200,N=200"], "ARGLINB" => ["M=200,N=200"], - "ARGLINC" => ["M=200,N=200"], "ARGTRIGLS" => ["N=200"], - "ARWHEAD" => ["N=100"], "BDQRTIC" => ["N=100"], "BOX" => ["N=10"], "BOXPOWER" => ["N=10"], "BROWNAL" => ["N=200"], "BROYDN3DLS" => ["KAPPA1=2.0,KAPPA2=1.0,N=50"], "BROYDN7D" => ["N/2=25"], "BROYDNBDLS" => ["KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=50,UB=1"], "BRYBND" => ["KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=50,UB=1"], "CHAINWOO" => ["NS=1"], "CHNROSNB" => ["N=25"], "CHNRSNBM" => ["N=25"], "COSINE" => ["N=100"], "CRAGGLVY" => ["M=24"], "CURLY10" => ["N=100"], "CURLY20" => ["N=100"], "DIXMAANA" => ["M=30"], "DIXMAANB" => ["M=30"], "DIXMAANC" => ["M=30"], "DIXMAAND" => ["M=30"], "DIXMAANE" => ["M=30"], "DIXMAANF" => ["M=30"], "DIXMAANG" => ["M=30"], "DIXMAANH" => ["M=30"], "DIXMAANI" => ["M=30"], "DIXMAANJ" => ["M=30"], "DIXMAANK" => ["M=30"], "DIXMAANL" => ["M=30"], "DIXMAANM" => ["M=30"], "DIXMAANN" => ["M=30"], "DIXMAANO" => ["M=30"], "DIXMAANP" => ["M=30"], "DIXON3DQ" => ["N=100"], "DQDRTIC" => ["N=50"], "DQRTIC" => ["N=50"], "EDENSCH" => ["N=36"], "EIGENALS" => ["N=2"], "EIGENBLS" => ["N=2"], "EIGENCLS" => ["M=2"], "ENGVAL1" => ["N=50"], "ERRINROS" => ["N=25"], "ERRINRSM" => ["N=25"], "EXTROSNB" => ["N=100"], "FLETBV3M" => ["KAPPA=0.0,N=10"], "FLETCBV2" => ["KAPPA=0.0,N=10"], "FLETCBV3" => ["KAPPA=0.0,N=10"], "FLETCHBV" => ["KAPPA=0.0,N=10"], "FLETCHCR" => ["N=100"], "FMINSRF2" => ["P=4"], "FMINSURF" => ["P=4"], "FREUROTH" => ["N=50"], "GENHUMPS" => ["N=10,ZETA=20.0"], "GENROSE" => ["N=100"], "HILBERTA" => ["D=0.0,N=6"], "HILBERTB" => ["D=5.0,N=5"], "INDEF" => ["ALPHA=0.5,N=50"], "INDEFM" => ["ALPHA=0.5,N=50"], "INTEQNELS" => ["N=100"], "JIMACK" => ["M=2,N=2"], "LIARWHD" => ["N=36"], "MANCINO" => ["ALPHA=5,BETA=14.0,GAMMA=3,N=50"], "MODBEALE" => ["ALPHA=50.0,N/2=5"], "MOREBV" => ["N=50"], "MSQRTALS" => ["P=7"], "MSQRTBLS" => ["P=7"], "NCB20" => ["N=100"], "NCB20B" => ["N=180"], "NONCVXU2" => ["N=10"], "NONCVXUN" => ["N=10"], "NONDIA" => ["N=90"], "NONDQUAR" => ["N=100"], "NONMSQRT" => ["P=7"], "OSCIGRAD" => ["N=15,RHO=500.0"], "OSCIPATH" => ["N=25,RHO=500.0"], "PENALTY1" => ["N=50"], "PENALTY2" => ["N=50"], "PENALTY3" => ["N/2=25"], "POWELLSG" => ["N=60"], "POWER" => ["N=50"], "QUARTC" => ["N=100"], "SBRYBND" => ["N=50"], "SCHMVETT" => ["N=10"], "SCOSINE" => ["N=10"], "SCURLY10" => ["N=10"], "SENSORS" => ["N=10"], "SINQUAD" => ["N=50"], "SPARSINE" => ["N=50"], "SPARSQUR" => ["N=50"], "SPMSRTLS" => ["M=34"], "SROSENBR" => ["N/2=25"], "SSBRYBND" => ["N=50"], "SSCOSINE" => ["N=10"], "TOINTGSS" => ["N=50"], "TQUARTIC" => ["N=50"], "TRIDIA" => ["ALPHA=2.0,BETA=1.0,DELTA=1.0,GAMMA=1.0,N=50"], "VARDIM" => ["N=200"], "VAREIGVL" => ["M=4,N=99,Q=1.5"], "WATSON" => ["N=12"], "WOODS" => ["NS=1"], "YATP1LS" => ["N=10"], "YATP2LS" => ["N=2"] -) -UNC_PROBLEMS_200to5000 = Dict("ARWHEAD" => ["N=1000"], "BDQRTIC" => ["N=1000"], "BOX" => ["N=1000"], "BOXPOWER" => ["N=1000"], "BROWNAL" => ["N=1000"], "BROYDN3DLS" => ["KAPPA1=2.0,KAPPA2=1.0,N=1000"], "BROYDN7D" => ["N/2=250"], "BROYDNBDLS" => ["KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=1000,UB=1"], "BRYBND" => ["KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=1000,UB=1"], "CHAINWOO" => ["NS=499"], "COSINE" => ["N=1000"], "CRAGGLVY" => ["M=499"], "CURLY10" => ["N=1000"], "CURLY20" => ["N=1000"], "CURLY30" => ["N=1000"], "DIXMAANA" => ["M=1000"], "DIXMAANB" => ["M=1000"], "DIXMAANC" => ["M=1000"], "DIXMAAND" => ["M=1000"], "DIXMAANE" => ["M=1000"], "DIXMAANF" => ["M=1000"], "DIXMAANG" => ["M=1000"], "DIXMAANH" => ["M=1000"], "DIXMAANI" => ["M=1000"], "DIXMAANJ" => ["M=1000"], "DIXMAANK" => ["M=1000"], "DIXMAANL" => ["M=1000"], "DIXMAANM" => ["M=1000"], "DIXMAANN" => ["M=1000"], "DIXMAANO" => ["M=1000"], "DIXMAANP" => ["M=1000"], "DIXON3DQ" => ["N=1000"], "DQDRTIC" => ["N=1000"], "DQRTIC" => ["N=1000"], "EDENSCH" => ["N=2000"], "EIGENALS" => ["N=50"], "EIGENBLS" => ["N=50"], "EIGENCLS" => ["M=25"], "ENGVAL1" => ["N=1000"], "EXTROSNB" => ["N=1000"], "FLETBV3M" => ["KAPPA=0.0,N=1000"], "FLETCBV2" => ["KAPPA=0.0,N=1000"], "FLETCBV3" => ["KAPPA=0.0,N=1000"], "FLETCHBV" => ["KAPPA=0.0,N=1000"], "FLETCHCR" => ["N=1000"], "FMINSRF2" => ["P=31"], "FMINSURF" => ["P=31"], "FREUROTH" => ["N=1000"], "GENHUMPS" => ["N=1000,ZETA=20.0"], "GENROSE" => ["N=500"], "INDEF" => ["ALPHA=0.5,N=1000"], "INDEFM" => ["ALPHA=0.5,N=1000"], "INTEQNELS" => ["N=500"], "JIMACK" => ["M=2,N=12"], "LIARWHD" => ["N=1000"], "MODBEALE" => ["ALPHA=50.0,N/2=1000"], "MOREBV" => ["N=1000"], "MSQRTALS" => ["P=70"], "MSQRTBLS" => ["P=70"], "NCB20" => ["N=1000"], "NCB20B" => ["N=1000"], "NONCVXU2" => ["N=1000"], "NONCVXUN" => ["N=1000"], "NONDIA" => ["N=1000"], "NONDQUAR" => ["N=1000"], "NONMSQRT" => ["P=70"], "OSCIGRAD" => ["N=1000,RHO=500.0"], "OSCIPATH" => ["N=500,RHO=500.0"], "PENALTY1" => ["N=1000"], "PENALTY2" => ["N=1000"], "POWELLSG" => ["N=1000"], "POWER" => ["N=1000"], "QUARTC" => ["N=1000"], "SBRYBND" => ["N=1000"], "SCHMVETT" => ["N=1000"], "SCOSINE" => ["N=1000"], "SCURLY10" => ["N=1000"], "SCURLY20" => ["N=1000"], "SCURLY30" => ["N=1000"], "SENSORS" => ["N=1000"], "SINQUAD" => ["N=1000"], "SPARSINE" => ["N=1000"], "SPARSQUR" => ["N=1000"], "SPMSRTLS" => ["M=334"], "SROSENBR" => ["N/2=250"], "SSBRYBND" => ["N=1000"], "SSCOSINE" => ["N=1000"], "TESTQUAD" => ["N=1000"], "TOINTGSS" => ["N=1000"], "TQUARTIC" => ["N=1000"], "TRIDIA" => ["ALPHA=2.0,BETA=1.0,DELTA=1.0,GAMMA=1.0,N=1000"], "VAREIGVL" => ["M=4,N=499,Q=1.5"], "WOODS" => ["NS=1000"], "YATP1LS" => ["N=50"], "YATP2LS" => ["N=50"]) +TEST = [["EXTROSNB", "N=100"]] +UNC_PROBLEMS_4to200 = [ + ["ARGLINA", "M=200,N=200"], + ["ARGLINB", "M=200,N=200"], + ["ARGLINC", "M=200,N=200"], + ["ARGTRIGLS", "N=200"], + ["ARWHEAD", "N=100"], + ["BDQRTIC", "N=100"], + ["BOX", "N=10"], + ["BOXPOWER", "N=10"], + ["BROWNAL", "N=200"], + ["BROYDN3DLS", "KAPPA1=2.0,KAPPA2=1.0,N=50"], + ["BROYDN7D", "N/2=25"], + ["BROYDNBDLS", "KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=50,UB=1"], + ["BRYBND", "KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=50,UB=1"], + ["CHAINWOO", "NS=1"], + ["CHNROSNB", "N=25"], + ["CHNRSNBM", "N=25"], + ["COSINE", "N=100"], + ["CRAGGLVY", "M=24"], + ["CURLY10", "N=100"], + ["CURLY20", "N=100"], + ["DIXMAANA", "M=30"], + ["DIXMAANB", "M=30"], + ["DIXMAANC", "M=30"], + ["DIXMAAND", "M=30"], + ["DIXMAANE", "M=30"], + ["DIXMAANF", "M=30"], + ["DIXMAANG", "M=30"], + ["DIXMAANH", "M=30"], + ["DIXMAANI", "M=30"], + ["DIXMAANJ", "M=30"], + ["DIXMAANK", "M=30"], + ["DIXMAANL", "M=30"], + ["DIXMAANM", "M=30"], + ["DIXMAANN", "M=30"], + ["DIXMAANO", "M=30"], + ["DIXMAANP", "M=30"], + ["DIXON3DQ", "N=100"], + ["DQDRTIC", "N=50"], + ["DQRTIC", "N=50"], + ["EDENSCH", "N=36"], + ["EIGENALS", "N=2"], + ["EIGENBLS", "N=2"], + ["EIGENCLS", "M=2"], + ["ENGVAL1", "N=50"], + ["ERRINROS", "N=25"], + ["ERRINRSM", "N=25"], + ["EXTROSNB", "N=100"], + ["FLETBV3M", "KAPPA=0.0,N=10"], + ["FLETCBV2", "KAPPA=0.0,N=10"], + ["FLETCBV3", "KAPPA=0.0,N=10"], + ["FLETCHBV", "KAPPA=0.0,N=10"], + ["FLETCHCR", "N=100"], + ["FMINSRF2", "P=4"], + ["FMINSURF", "P=4"], + ["FREUROTH", "N=50"], + ["GENHUMPS", "N=10,ZETA=20.0"], + ["GENROSE", "N=100"], + ["HILBERTA", "D=0.0,N=6"], + ["HILBERTB", "D=5.0,N=5"], + ["INDEF", "ALPHA=0.5,N=50"], + ["INDEFM", "ALPHA=0.5,N=50"], + ["INTEQNELS", "N=100"], + ["JIMACK", "M=2,N=2"], + ["LIARWHD", "N=36"], + ["MANCINO", "ALPHA=5,BETA=14.0,GAMMA=3,N=50"], + ["MODBEALE", "ALPHA=50.0,N/2=5"], + ["MOREBV", "N=50"], + ["MSQRTALS", "P=7"], + ["MSQRTBLS", "P=7"], + ["NCB20", "N=100"], + ["NCB20B", "N=180"], + ["NONCVXU2", "N=10"], + ["NONCVXUN", "N=10"], + ["NONDIA", "N=90"], + ["NONDQUAR", "N=100"], + ["NONMSQRT", "P=7"], + ["OSCIGRAD", "N=15,RHO=500.0"], + ["OSCIPATH", "N=25,RHO=500.0"], + ["PENALTY1", "N=50"], + ["PENALTY2", "N=50"], + ["PENALTY3", "N/2=25"], + ["POWELLSG", "N=60"], + ["POWER", "N=50"], + ["QUARTC", "N=100"], + ["SBRYBND", "N=50"], + ["SCHMVETT", "N=10"], + ["SCOSINE", "N=10"], + ["SCURLY10", "N=10"], + ["SENSORS", "N=10"], + ["SINQUAD", "N=50"], + ["SPARSINE", "N=50"], + ["SPARSQUR", "N=50"], + ["SPMSRTLS", "M=34"], + ["SROSENBR", "N/2=25"], + ["SSBRYBND", "N=50"], + ["SSCOSINE", "N=10"], + ["TOINTGSS", "N=50"], + ["TQUARTIC", "N=50"], + ["TRIDIA", "ALPHA=2.0,BETA=1.0,DELTA=1.0,GAMMA=1.0,N=50"], + ["VARDIM", "N=200"], + ["VAREIGVL", "M=4,N=99,Q=1.5"], + ["WATSON", "N=12"], + ["WOODS", "NS=1"], + ["YATP1LS", "N=10"], + ["YATP2LS", "N=2"] +] + +UNC_PROBLEMS_201to5000 = [ + ["ARWHEAD", "N=1000"], + ["BDQRTIC", "N=1000"], + ["BOX", "N=1000"], + ["BOXPOWER", "N=1000"], + ["BROWNAL", "N=1000"], + ["BROYDN3DLS", "KAPPA1=2.0,KAPPA2=1.0,N=1000"], + ["BROYDN7D", "N/2=250"], + ["BROYDNBDLS", + "KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=1000,UB=1"], + ["BRYBND", "KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=1000,UB=1"], + ["CHAINWOO", "NS=499"], + ["COSINE", "N=1000"], + ["CRAGGLVY", "M=499"], + ["CURLY10", "N=1000"], + ["CURLY20", "N=1000"], + ["CURLY30", "N=1000"], + ["DIXMAANA", "M=1000"], + ["DIXMAANB", "M=1000"], + ["DIXMAANC", "M=1000"], + ["DIXMAAND", "M=1000"], + ["DIXMAANE", "M=1000"], + ["DIXMAANF", "M=1000"], + ["DIXMAANG", "M=1000"], + ["DIXMAANH", "M=1000"], + ["DIXMAANI", "M=1000"], + ["DIXMAANJ", "M=1000"], + ["DIXMAANK", "M=1000"], + ["DIXMAANL", "M=1000"], + ["DIXMAANM", "M=1000"], + ["DIXMAANN", "M=1000"], + ["DIXMAANO", "M=1000"], + ["DIXMAANP", "M=1000"], + ["DIXON3DQ", "N=1000"], + ["DQDRTIC", "N=1000"], + ["DQRTIC", "N=1000"], + ["EDENSCH", "N=2000"], + ["EIGENALS", "N=50"], + ["EIGENBLS", "N=50"], + ["EIGENCLS", "M=25"], + ["ENGVAL1", "N=1000"], + ["EXTROSNB", "N=1000"], + ["FLETBV3M", "KAPPA=0.0,N=1000"], + ["FLETCBV2", "KAPPA=0.0,N=1000"], + ["FLETCBV3", "KAPPA=0.0,N=1000"], + ["FLETCHBV", "KAPPA=0.0,N=1000"], + ["FLETCHCR", "N=1000"], + ["FMINSRF2", "P=31"], + ["FMINSURF", "P=31"], + ["FREUROTH", "N=1000"], + ["GENHUMPS", "N=1000,ZETA=20.0"], + ["GENROSE", "N=500"], + ["INDEF", "ALPHA=0.5,N=1000"], + ["INDEFM", "ALPHA=0.5,N=1000"], + ["INTEQNELS", "N=500"], + ["JIMACK", "M=2,N=12"], + ["LIARWHD", "N=1000"], + ["MODBEALE", "ALPHA=50.0,N/2=1000"], + ["MOREBV", "N=1000"], + ["MSQRTALS", "P=70"], + ["MSQRTBLS", "P=70"], + ["NCB20", "N=1000"], + ["NCB20B", "N=1000"], + ["NONCVXU2", "N=1000"], + ["NONCVXUN", "N=1000"], + ["NONDIA", "N=1000"], + ["NONDQUAR", "N=1000"], + ["NONMSQRT", "P=70"], + ["OSCIGRAD", "N=1000,RHO=500.0"], + ["OSCIPATH", "N=500,RHO=500.0"], + ["PENALTY1", "N=1000"], + ["PENALTY2", "N=1000"], + ["POWELLSG", "N=1000"], + ["POWER", "N=1000"], + ["QUARTC", "N=1000"], + ["SBRYBND", "N=1000"], + ["SCHMVETT", "N=1000"], + ["SCOSINE", "N=1000"], + ["SCURLY10", "N=1000"], + ["SCURLY20", "N=1000"], + ["SCURLY30", "N=1000"], + ["SENSORS", "N=1000"], + ["SINQUAD", "N=1000"], + ["SPARSINE", "N=1000"], + ["SPARSQUR", "N=1000"], + ["SPMSRTLS", "M=334"], + ["SROSENBR", "N/2=250"], + ["SSBRYBND", "N=1000"], + ["SSCOSINE", "N=1000"], + ["TESTQUAD", "N=1000"], + ["TOINTGSS", "N=1000"], + ["TQUARTIC", "N=1000"], + ["TRIDIA", "ALPHA=2.0,BETA=1.0,DELTA=1.0,GAMMA=1.0,N=1000"], + ["VAREIGVL", "M=4,N=499,Q=1.5"], + ["WOODS", "NS=1000"], + ["YATP1LS", "N=50"], + ["YATP2LS", "N=50"] +] UNC_PROBLEMS_COMB = [ - UNC_PROBLEMS_4to200..., UNC_PROBLEMS_200to5000... + UNC_PROBLEMS_4to200..., UNC_PROBLEMS_201to5000... ] UNC_PROBLEMS_221104 = Dict( diff --git a/test/cutest-benchmark/test_cutest_batch.jl b/test/cutest-benchmark/test_cutest_batch.jl index 4f006d1..4016e2a 100644 --- a/test/cutest-benchmark/test_cutest_batch.jl +++ b/test/cutest-benchmark/test_cutest_batch.jl @@ -33,7 +33,7 @@ write(csvfile, join(header, ","), "\n") # todo, add field kf, kg, kH, and inner iteration # p = Progress(length(PROBLEMS); showspeed=true) for (f, param_combination) in PROBLEMS - for pc in param_combination + for pc in [param_combination][:] try nlp = CUTEstModel(f, "-param", pc) name = "$(nlp.meta.name)-$(nlp.meta.nvar)" From 03e4187d0a3d57066af40bf5985d97e6b190b394 Mon Sep 17 00:00:00 2001 From: cz Date: Fri, 29 Dec 2023 14:08:35 +0800 Subject: [PATCH 08/17] add mishchenko's method --- src/algorithms/hsodm.jl | 2 +- test/cutest-benchmark/hsodm_paper_test.jl | 180 ++++++++++++++++++++++ test/cutest-benchmark/test_setup.jl | 7 +- test/tools.jl | 2 + 4 files changed, 187 insertions(+), 4 deletions(-) diff --git a/src/algorithms/hsodm.jl b/src/algorithms/hsodm.jl index e5a2ad1..81641f1 100644 --- a/src/algorithms/hsodm.jl +++ b/src/algorithms/hsodm.jl @@ -59,7 +59,7 @@ Base.@kwdef mutable struct HSODMState{R,Tx} σ::R = 1e3 # adaptive arc style parameter σ t::R = 0.0 # running time λ₁::Float64 = 0.0 # smallest curvature if available - δ::Float64 = 1e-3 # smallest curvature if available + δ::Float64 = -1e-4 # second diagonal if available ######################################################### kₜ::Int = 1 # inner iterations kᵥ::Int = 1 # krylov iterations diff --git a/test/cutest-benchmark/hsodm_paper_test.jl b/test/cutest-benchmark/hsodm_paper_test.jl index c819ae6..f226d78 100644 --- a/test/cutest-benchmark/hsodm_paper_test.jl +++ b/test/cutest-benchmark/hsodm_paper_test.jl @@ -213,6 +213,186 @@ UNC_PROBLEMS_COMB = [ UNC_PROBLEMS_4to200..., UNC_PROBLEMS_201to5000... ] +UNC_PROBLEMS_GOOD = [ + ["ARGLINA", "M=200,N=200"], + ["ARGTRIGLS", "N=200"], + ["ARWHEAD", "N=100"], + ["ARWHEAD", "N=1000"], + ["BDQRTIC", "N=100"], + ["BDQRTIC", "N=1000"], + ["BOX", "N=10"], + ["BOX", "N=1000"], + ["BOXPOWER", "N=10"], + ["BOXPOWER", "N=1000"], + ["BROWNAL", "N=1000"], + ["BROWNAL", "N=200"], + ["BROYDN3DLS", "KAPPA1=2.0,KAPPA2=1.0,N=1000"], + ["BROYDN3DLS", "KAPPA1=2.0,KAPPA2=1.0,N=50"], + ["BROYDN7D", "N/2=25"], + ["BROYDN7D", "N/2=250"], + ["BROYDNBDLS", + "KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=1000,UB=1"], + ["BROYDNBDLS", "KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=50,UB=1"], + ["BRYBND", "KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=1000,UB=1"], + ["BRYBND", "KAPPA1=2.0,KAPPA2=5.0,KAPPA3=1.0,LB=5,N=50,UB=1"], + ["CHAINWOO", "NS=1"], + ["CHAINWOO", "NS=499"], + ["CHNROSNB", "N=25"], + ["CHNRSNBM", "N=25"], + ["COSINE", "N=100"], + ["COSINE", "N=1000"], + ["CRAGGLVY", "M=24"], + ["CRAGGLVY", "M=499"], + ["CURLY10", "N=100"], + ["CURLY10", "N=1000"], + ["CURLY20", "N=100"], + ["CURLY20", "N=1000"], + ["CURLY30", "N=1000"], + ["DIXMAANA", "M=1000"], + ["DIXMAANA", "M=30"], + ["DIXMAANB", "M=1000"], + ["DIXMAANB", "M=30"], + ["DIXMAANC", "M=1000"], + ["DIXMAANC", "M=30"], + ["DIXMAAND", "M=1000"], + ["DIXMAAND", "M=30"], + ["DIXMAANE", "M=1000"], + ["DIXMAANE", "M=30"], + ["DIXMAANF", "M=1000"], + ["DIXMAANF", "M=30"], + ["DIXMAANG", "M=1000"], + ["DIXMAANG", "M=30"], + ["DIXMAANH", "M=1000"], + ["DIXMAANH", "M=30"], + ["DIXMAANI", "M=1000"], + ["DIXMAANI", "M=30"], + ["DIXMAANJ", "M=1000"], + ["DIXMAANJ", "M=30"], + ["DIXMAANK", "M=1000"], + ["DIXMAANK", "M=30"], + ["DIXMAANL", "M=1000"], + ["DIXMAANL", "M=30"], + ["DIXMAANM", "M=1000"], + ["DIXMAANM", "M=30"], + ["DIXMAANN", "M=1000"], + ["DIXMAANN", "M=30"], + ["DIXMAANO", "M=1000"], + ["DIXMAANO", "M=30"], + ["DIXMAANP", "M=1000"], + ["DIXMAANP", "M=30"], + ["DIXON3DQ", "N=100"], + ["DIXON3DQ", "N=1000"], + ["DQDRTIC", "N=1000"], + ["DQDRTIC", "N=50"], + ["DQRTIC", "N=1000"], + ["DQRTIC", "N=50"], + ["EDENSCH", "N=2000"], + ["EDENSCH", "N=36"], + ["EIGENALS", "N=2"], + ["EIGENBLS", "N=2"], + ["EIGENCLS", "M=2"], + ["ENGVAL1", "N=1000"], + ["ENGVAL1", "N=50"], + ["ERRINROS", "N=25"], + ["ERRINRSM", "N=25"], + ["EXTROSNB", "N=1000"], + ["FLETBV3M", "KAPPA=0.0,N=10"], + ["FLETBV3M", "KAPPA=0.0,N=1000"], + ["FLETCBV2", "KAPPA=0.0,N=10"], + ["FLETCBV2", "KAPPA=0.0,N=1000"], + ["FLETCBV3", "KAPPA=0.0,N=10"], + ["FLETCHBV", "KAPPA=0.0,N=10"], + ["FLETCHCR", "N=100"], + ["FLETCHCR", "N=1000"], + ["FMINSRF2", "P=31"], + ["FMINSRF2", "P=4"], + ["FMINSURF", "P=31"], + ["FMINSURF", "P=4"], + ["FREUROTH", "N=1000"], + ["FREUROTH", "N=50"], + ["GENHUMPS", "N=10,ZETA=20.0"], + ["GENHUMPS", "N=1000,ZETA=20.0"], + ["GENROSE", "N=100"], + ["GENROSE", "N=500"], + ["HILBERTA", "D=0.0,N=6"], + ["HILBERTB", "D=5.0,N=5"], + ["INDEFM", "ALPHA=0.5,N=1000"], + ["INDEFM", "ALPHA=0.5,N=50"], + ["INTEQNELS", "N=100"], + ["INTEQNELS", "N=500"], + ["JIMACK", "M=2,N=12"], + ["JIMACK", "M=2,N=2"], + ["LIARWHD", "N=1000"], + ["LIARWHD", "N=36"], + ["MANCINO", "ALPHA=5,BETA=14.0,GAMMA=3,N=50"], + ["MODBEALE", "ALPHA=50.0,N/2=1000"], + ["MODBEALE", "ALPHA=50.0,N/2=5"], + ["MOREBV", "N=1000"], + ["MOREBV", "N=50"], + ["MSQRTALS", "P=7"], + ["MSQRTBLS", "P=7"], + ["NCB20", "N=100"], + ["NCB20", "N=1000"], + ["NCB20B", "N=1000"], + ["NCB20B", "N=180"], + ["NONCVXU2", "N=10"], + ["NONCVXU2", "N=1000"], + ["NONCVXUN", "N=10"], + ["NONCVXUN", "N=1000"], + ["NONDIA", "N=1000"], + ["NONDIA", "N=90"], + ["NONDQUAR", "N=100"], + ["NONDQUAR", "N=1000"], + ["OSCIGRAD", "N=1000,RHO=500.0"], + ["OSCIPATH", "N=25,RHO=500.0"], + ["OSCIPATH", "N=500,RHO=500.0"], + ["PENALTY1", "N=1000"], + ["PENALTY1", "N=50"], + ["PENALTY2", "N=50"], + ["PENALTY3", "N/2=25"], + ["POWELLSG", "N=1000"], + ["POWELLSG", "N=60"], + ["POWER", "N=1000"], + ["POWER", "N=50"], + ["QUARTC", "N=100"], + ["QUARTC", "N=1000"], + ["SCHMVETT", "N=10"], + ["SCHMVETT", "N=1000"], + ["SCURLY10", "N=10"], + ["SCURLY10", "N=1000"], + ["SCURLY20", "N=1000"], + ["SCURLY30", "N=1000"], + ["SENSORS", "N=10"], + ["SENSORS", "N=1000"], + ["SINQUAD", "N=1000"], + ["SINQUAD", "N=50"], + ["SPARSINE", "N=1000"], + ["SPARSINE", "N=50"], + ["SPARSQUR", "N=1000"], + ["SPARSQUR", "N=50"], + ["SPMSRTLS", "M=334"], + ["SPMSRTLS", "M=34"], + ["SROSENBR", "N/2=25"], + ["SROSENBR", "N/2=250"], + ["TESTQUAD", "N=1000"], + ["TOINTGSS", "N=1000"], + ["TOINTGSS", "N=50"], + ["TQUARTIC", "N=1000"], + ["TQUARTIC", "N=50"], + ["TRIDIA", "ALPHA=2.0,BETA=1.0,DELTA=1.0,GAMMA=1.0,N=1000"], + ["TRIDIA", "ALPHA=2.0,BETA=1.0,DELTA=1.0,GAMMA=1.0,N=50"], + ["VARDIM", "N=200"], + ["VAREIGVL", "M=4,N=499,Q=1.5"], + ["VAREIGVL", "M=4,N=99,Q=1.5"], + ["WATSON", "N=12"], + ["WOODS", "NS=1"], + ["WOODS", "NS=1000"], + ["YATP1LS", "N=10"], + ["YATP1LS", "N=50"], + ["YATP2LS", "N=2"], + ["YATP2LS", "N=50"] +] + UNC_PROBLEMS_221104 = Dict( "ARGLINA" => [ "M=200,N=200", diff --git a/test/cutest-benchmark/test_setup.jl b/test/cutest-benchmark/test_setup.jl index 99fe890..a39c7e1 100644 --- a/test/cutest-benchmark/test_setup.jl +++ b/test/cutest-benchmark/test_setup.jl @@ -32,9 +32,9 @@ filter_cutest_problem(nlp) = true # filter_optimization_method(k) = k == :LBFGS # filter_optimization_method(k) = k ∈ [:LBFGS, :HSODM] # filter_optimization_method(k) = k ∈ [:DRSOM, :DRSOMHomo] -filter_optimization_method(k) = k == :HSODM +# filter_optimization_method(k) = k == :HSODM # filter_optimization_method(k) = k == :HSODMhvp -# filter_optimization_method(k) = k ∈ [:HSODMhvp, :HSODM] +filter_optimization_method(k) = k ∈ [:HSODMhvp, :HSODM] # filter_optimization_method(k) = k ∈ [:iUTRhvp] # filter_optimization_method(k) = k == :ARC # filter_optimization_method(k) = k == :TRST @@ -49,6 +49,7 @@ filter_optimization_method(k) = k == :HSODM # PROBLEMS = TEST # PROBLEMS = UNC_PROBLEMS_4to200 # PROBLEMS = UNC_PROBLEMS_200to5000 +# PROBLEMS = UNC_PROBLEMS_GOOD PROBLEMS = UNC_PROBLEMS_COMB if test_before_start @@ -117,4 +118,4 @@ if test_before_start # end finalize(nlp) end -end \ No newline at end of file +end diff --git a/test/tools.jl b/test/tools.jl index 6b7326d..757f637 100644 --- a/test/tools.jl +++ b/test/tools.jl @@ -224,6 +224,7 @@ wrapper_hsodm(x, loss, g, H, options; kwargs...) = x0=copy(x), f=loss, g=g, H=H, linesearch=:hagerzhang, direction=:warm, + adaptive=:mishchenko, options... ) alg_hsodm_hvp = HSODM() @@ -232,6 +233,7 @@ wrapper_hsodm_hvp(x, loss, g, H, options; kwargs...) = x0=copy(x), f=loss, g=g, linesearch=:hagerzhang, direction=:warm, + adaptive=:mishchenko, kwargs..., options... ) From e7787678e6c78867fb0d89d0584e0e68a019bb78 Mon Sep 17 00:00:00 2001 From: cz Date: Sun, 7 Jan 2024 16:05:45 +0800 Subject: [PATCH 09/17] update readme --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8255612..1aafb66 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DRSOM: A Dimension-Reduced Second-Order Method for Convex and Nonconvex Optimization +# DRSOM.jl: A Second-Order Optimization Package for Nonlinear Programming [![docs-stable][docs-stable-img]][docs-stable-url] [![docs-dev][docs-dev-img]][docs-dev-url] @@ -10,9 +10,10 @@ [docs-dev-url]: https://copt-public.github.io/DRSOM.jl/dev -DRSOM.jl is a Julia implementation of the Dimension-Reduced Second-Order Method for unconstrained smooth optimization. +DRSOM.jl is a Julia implementation of a few second-order optimization methods for nonlinear optimization. **DRSOM.jl now includes a bunch of other algorithms, beyond the original `DRSOM`**: +- Dimension-Reduced Second-Order Method (`DRSOM`) - Homogeneous Second-order Descent Method (`HSODM`). - Homotopy Path-Following HSODM (`PFH`) - Universal Trust-Region Method (`UTR`) @@ -20,9 +21,10 @@ DRSOM.jl is a Julia implementation of the Dimension-Reduced Second-Order Method ## Reference You are welcome to cite our papers : ) ``` -1. He, C., Jiang, Y., Zhang, C., Ge, D., Jiang, B., Ye, Y.: Homogeneous Second-Order Descent Framework: A Fast Alternative to Newton-Type Methods, http://arxiv.org/abs/2306.17516, (2023) +1. Zhang, C., Ge, D., He, C., Jiang, B., Jiang, Y., Ye, Y.: DRSOM: A Dimension Reduced Second-Order Method, http://arxiv.org/abs/2208.00208, (2022) 2. Zhang, C., Ge, D., He, C., Jiang, B., Jiang, Y., Xue, C., Ye, Y.: A Homogeneous Second-Order Descent Method for Nonconvex Optimization, http://arxiv.org/abs/2211.08212, (2022) -3. Zhang, C., Ge, D., He, C., Jiang, B., Jiang, Y., Ye, Y.: DRSOM: A Dimension Reduced Second-Order Method, http://arxiv.org/abs/2208.00208, (2022) +3. He, C., Jiang, Y., Zhang, C., Ge, D., Jiang, B., Ye, Y.: Homogeneous Second-Order Descent Framework: A Fast Alternative to Newton-Type Methods, http://arxiv.org/abs/2306.17516, (2023) +4. Jiang, Y., He, C., Zhang, C., Ge, D., Jiang, B., Ye, Y.: A Universal Trust-Region Method for Convex and Nonconvex Optimization, http://arxiv.org/abs/2311.11489, (2023) ``` ## Developer @@ -38,6 +40,6 @@ You are welcome to cite our papers : ) ## License `DRSOM.jl` is licensed under the MIT License. Check `LICENSE` for more details -## Acknowledgment +## Acknowledgement - Special thanks go to the COPT team and Tianyi Lin [(Darren)](https://tydlin.github.io/) for helpful suggestions. From 37d682534e84549b9a873d0750cd64d9283d8b25 Mon Sep 17 00:00:00 2001 From: cz Date: Wed, 14 Feb 2024 17:09:19 +0800 Subject: [PATCH 10/17] add Reg Newton inexact mode --- src/algorithms/hsodm.jl | 13 +-- src/algorithms/pfh.jl | 19 +++-- src/algorithms/utr.jl | 158 +++++++++++++++++++++++++---------- src/utilities/homogeneous.jl | 74 +++++++++++++--- test/convex/test_logistic.jl | 125 +++++++++++++++------------ 5 files changed, 270 insertions(+), 119 deletions(-) diff --git a/src/algorithms/hsodm.jl b/src/algorithms/hsodm.jl index 81641f1..3e15d08 100644 --- a/src/algorithms/hsodm.jl +++ b/src/algorithms/hsodm.jl @@ -65,6 +65,7 @@ Base.@kwdef mutable struct HSODMState{R,Tx} kᵥ::Int = 1 # krylov iterations kf::Int = 0 # function evaluations kg::Int = 0 # gradient evaluations + kgh::Int = 0 # gradient + hvp evaluations kH::Int = 0 # hessian evaluations kh::Int = 0 # hvp evaluations k₂::Int = 0 # 2nd oracle evaluations @@ -188,7 +189,7 @@ function Base.iterate(iter::HSODMIteration, state::HSODMState{R,Tx}) where {R,Tx state.dq = dq state.df = df state.d = x - z - state.kᵥ = kᵥ + state.kᵥ += kᵥ state.k₂ += k₂ state.kₜ = kₜ state.ϵ = norm(state.∇f) @@ -257,9 +258,10 @@ hsodm_stopping_criterion(tol, state::HSODMState) = function counting(iter::T, state::S) where {T<:HSODMIteration,S<:HSODMState} try state.kf = getfield(iter.f, :counter) - state.kg = getfield(iter.g, :counter) state.kH = hasproperty(iter.H, :counter) ? getfield(iter.H, :counter) : 0 state.kh = hasproperty(iter.hvp, :counter) ? getfield(iter.hvp, :counter) : 0 + state.kg = getfield(iter.g, :counter) + state.kgh = state.kg + state.kh * 2 catch end end @@ -350,11 +352,12 @@ function summarize(io::IO, k::Int, t::T, s::S) where {T<:HSODMIteration,S<:HSODM println(io, "oracle calls:") @printf io " (main) k := %d \n" k @printf io " (function) f := %d \n" s.kf - @printf io " (first-order) g(+hvp) := %d \n" s.kg + s.kh - @printf io " (first-order) hvp := %d \n" s.kh + @printf io " (first-order) g := %d \n" s.kg + @printf io " (first-order) g(+hvp) := %d \n" s.kgh @printf io " (second-order) H := %d \n" s.kH + @printf io " (second-order) hvp := %d \n" s.kh @printf io " (sub-problem) P := %d \n" s.k₂ - @printf io " (sub-problem) kₕ := %d \n" s.kᵥ + @printf io " (sub-calls) kᵥ := %d \n" s.kᵥ @printf io " (running time) t := %.3f \n" s.t println(io, "-"^length(t.LOG_SLOTS)) flush(io) diff --git a/src/algorithms/pfh.jl b/src/algorithms/pfh.jl index 6d1fa7d..65cf52d 100644 --- a/src/algorithms/pfh.jl +++ b/src/algorithms/pfh.jl @@ -75,6 +75,7 @@ Base.@kwdef mutable struct PFHState{R,Tx,Tg} ξ::Tx # eigenvector kf::Int = 0 # function evaluations kg::Int = 0 # gradient evaluations + kgh::Int = 0 # gradient + hvp evaluations kH::Int = 0 # hessian evaluations kh::Int = 0 # hvp evaluations k₂::Int = 0 # 2 oracle evaluations @@ -176,8 +177,6 @@ function Base.iterate(iter::PFHIteration, state::PFHState{R,Tx}) where {R,Tx} H, state.μ, state.∇fμ, state; verbose=iter.verbose > 1 ) else - - # println(hvp(state.x)) kᵥ, k₂, v, vn, vg, vHv = NewtonStep( iter, state.μ, state.∇fμ, state; verbose=iter.verbose > 1 ) @@ -208,14 +207,13 @@ function Base.iterate(iter::PFHIteration, state::PFHState{R,Tx}) where {R,Tx} state.dq = dq state.df = df state.d = x - z - state.kᵥ = kᵥ + state.kᵥ += kᵥ state.k₂ += k₂ state.kₜ += 1 state.ϵ = norm(state.∇f) state.ϵμ = norm(state.∇fμ) if (state.ϵμ < min(5e-1, 10 * state.μ)) || state.kₜ > 10 - # state.μ = state.μ < 2e-6 ? 0 : 0.06 * state.μ state.μ = 0.02 * state.μ state.kᵤ += 1 @@ -234,9 +232,10 @@ pfh_stopping_criterion(tol, state::PFHState) = function counting(iter::T, state::S) where {T<:PFHIteration,S<:PFHState} try state.kf = getfield(iter.f, :counter) - state.kg = getfield(iter.g, :counter) state.kH = hasproperty(iter.H, :counter) ? getfield(iter.H, :counter) : 0 - state.kh = 0 # todo, accept hvp iterative in the future + state.kh = hasproperty(iter.hvp, :counter) ? getfield(iter.hvp, :counter) : 0 + state.kg = getfield(iter.g, :counter) + state.kgh = state.kg + state.kh * 2 catch end end @@ -286,7 +285,7 @@ function (alg::IterativeAlgorithm{T,S})(; arr = Vector{S}() kwds = Dict(kwargs...) - for cf ∈ [:f :g :H] + for cf ∈ [:f :g :H :hvp] apply_counter(cf, kwds) end iter = T(; μ₀=μ₀, eigtol=eigtol, linesearch=linesearch, adaptive=adaptive, direction=direction, verbose=verbose, kwds...) @@ -323,6 +322,7 @@ function Base.show(io::IO, t::T) where {T<:PFHIteration} (t.μ₀ == 0) && @printf io " !!! μ₀ ≈ 0, reduce to inexact Newton Method\n" println(io, "-"^length(t.LOG_SLOTS)) flush(io) + end function summarize(io::IO, k::Int, t::T, s::S) where {T<:PFHIteration,S<:PFHState} @@ -334,9 +334,12 @@ function summarize(io::IO, k::Int, t::T, s::S) where {T<:PFHIteration,S<:PFHStat @printf io " (main) kᵤ := %d \n" s.kᵤ @printf io " (main-total) k := %d \n" k @printf io " (function) f := %d \n" s.kf - @printf io " (first-order) g(+hvp) := %d \n" s.kg + @printf io " (first-order) g := %d \n" s.kg + @printf io " (first-order) g(+hvp) := %d \n" s.kgh @printf io " (second-order) H := %d \n" s.kH + @printf io " (second-order) hvp := %d \n" s.kh @printf io " (sub-problem) P := %d \n" s.k₂ + @printf io " (sub-calls) kᵥ := %d \n" s.kᵥ @printf io " (running time) t := %.3f \n" s.t println(io, "-"^length(t.LOG_SLOTS)) flush(io) diff --git a/src/algorithms/utr.jl b/src/algorithms/utr.jl index 386ce6f..1823d81 100644 --- a/src/algorithms/utr.jl +++ b/src/algorithms/utr.jl @@ -18,6 +18,8 @@ Base.@kwdef mutable struct UTRIteration{Tx,Tf,Tϕ,Tg,TH,Th} hvp::Th = nothing fvp::Union{Function,Nothing} = nothing ff::Union{Function,Nothing} = nothing + fc::Union{Function,Nothing} = nothing + opH::Union{LinearOperator,Nothing} = nothing H::TH = nothing # hessian function x0::Tx # initial point t::Dates.DateTime = Dates.now() @@ -69,6 +71,7 @@ Base.@kwdef mutable struct UTRState{R,Tx} t::R = 0.0 # running time kf::Int = 0 # function evaluations kg::Int = 0 # gradient evaluations + kgh::Int = 0 # gradient + hvp evaluations kH::Int = 0 # hessian evaluations kh::Int = 0 # hvp evaluations k₂::Int = 0 # 2 oracle evaluations @@ -85,6 +88,7 @@ function Base.iterate(iter::UTRIteration) grad_f_x = iter.g(z) Hv = similar(grad_f_x) # this is a buffer for Hvp gₙ = norm(grad_f_x, 2) + n = z |> length state = UTRState( x=z, y=z, @@ -107,7 +111,13 @@ function Base.iterate(iter::UTRIteration) iter.hvp(state.x, v, Hv) return Hv end - iter.ff = hvp + iter.fc = hvp + function hvps(y, v) + iter.hvp(state.x, v, Hv) + copy!(y, Hv .+ state.σ * v) + end + iter.ff = (y, v) -> hvps(y, v) + iter.opH = LinearOperator(Float64, n, n, true, true, (y, v) -> iter.ff(y, v)) return state, state end @@ -165,6 +175,9 @@ function iterate_cholesky( σ = σ₀ / 2 r = 1 / 3 / σ₀ σ₀ = σ₀ * grad_regularizer + elseif iter.initializerule == :unscaled + σ = iter.σ₀ + r = max(state.r, 1e-1) else σ = σ₀ = state.σ r = max(state.r, 1e-1) @@ -252,6 +265,7 @@ function iterate_evolve_lanczos( iter::UTRIteration, state::UTRState{R,Tx}; ) where {R,Tx} + @debug "start @" Dates.now() state.z = z = state.x state.fz = fz = state.fx state.∇fz = state.∇f @@ -263,55 +277,98 @@ function iterate_evolve_lanczos( n = (state.x |> length) k₂ = 0 - γ₁ = 8.0 + γ₁ = 2.0 γ₂ = 2.0 η = 1.0 ρ = 1.0 - σ = state.σ * grad_regularizer - Δ = max(state.r * grad_regularizer, 1e-1) - + # initialize an estimation + if iter.initializerule == :mishchenko + throw(ErrorException(""" + not implemented + """ + )) + σ₀, _, Df, bool_acc = initial_rules_mishchenko( + state, iter.g, H, iter.H, + state.σ + ) + σ = σ₀ / 2 + r = 1 / 3 / σ₀ + σ₀ = σ₀ * grad_regularizer + σ = σ * grad_regularizer + state.σ = σ + elseif iter.initializerule == :unscaled + state.σ = σ = iter.σ₀ + r = max(state.r, 1e-1) + else + σ = σ₀ = state.σ + σ = σ * grad_regularizer + state.σ = σ + r = max(state.r, 1e-1) + end + @debug "initialization @" Dates.now() + Δ = r * grad_regularizer Df = (η / ρ) * decs_regularizer # initialize n = state.∇f |> length - Sₗ = DRSOM.Lanczos(n, 2n + 1, state.∇f) + θ = λ₁ = 0.0 while true - if iter.mainstrategy == :newton - throw(ErrorException("we do not support regularized Newton in Lanczos mode; use Cholesky instead")) - end + # use evolving subspaces state.α = 1.0 k₁ = (n <= 500) ? round(Int, n * 0.9) : round(Int, n * 0.02) Ξ = 1e-1 * min(state.∇f |> norm |> sqrt, 1e0) - @debug "minimum subspace size $k₁" - if iter.hvp === nothing - H = iter.H(state.x) - v, θ, info = DRSOM.InexactLanczosTrustRegionBisect( - H, - -state.∇f, - Δ, - Sₗ; - σ=σ * (σ >= 1e-8), - k=Sₗ.k, - k₁=k₁, - Ξ=Ξ - ) - dq = -state.α^2 * v'H * v / 2 - state.α * v'state.∇f + @debug "minimum subspace size $k₁ @" Dates.now() + + if iter.mainstrategy == :newton + # throw(ErrorException( + # "we do not support regularized Newton in Lanczos mode yet; use Cholesky instead" + # )) + # todo + if iter.hvp === nothing + H = iter.H(state.x) + kᵥ, k₂, v, vn, vg, vHv = NewtonStep( + H, state.σ, state.∇f, state; verbose=iter.verbose > 1 + ) + else + kᵥ, k₂, v, vn, vg, vHv = NewtonStep( + iter, state.σ, state.∇f, state; verbose=iter.verbose > 1 + ) + end + dq = -state.α^2 * vHv / 2 - state.α * vg + # @printf "NewtonStep: %d %d %e %e %e %e\n" kᵥ k₂ vn vg vHv dq else - v, θ, info = DRSOM.InexactLanczosTrustRegionBisect( - iter.ff, - -state.∇f, - Δ, - Sₗ; - σ=σ * (σ >= 1e-8), - k=Sₗ.k, - k₁=k₁, - Ξ=Ξ - ) - dq = -state.α^2 * v'iter.ff(v) / 2 - state.α * v'state.∇f + Sₗ = DRSOM.Lanczos(n, 2n + 1, state.∇f) + if iter.hvp === nothing + H = iter.H(state.x) + v, θ, info = DRSOM.InexactLanczosTrustRegionBisect( + H, + -state.∇f, + Δ, + Sₗ; + σ=σ * (σ >= 1e-8), + k=Sₗ.k, + k₁=k₁, + Ξ=Ξ + ) + dq = -state.α^2 * v'H * v / 2 - state.α * v'state.∇f + else + v, θ, info = DRSOM.InexactLanczosTrustRegionBisect( + iter.fc, + -state.∇f, + Δ, + Sₗ; + σ=σ * (σ >= 1e-8), + k=Sₗ.k, + k₁=k₁, + Ξ=Ξ + ) + dq = -state.α^2 * v'iter.fc(v) / 2 - state.α * v'state.∇f + end + Δ = min(Δ, v |> norm) + λ₁ = info.λ₁ + kᵥ = info.kₗ end - - λ₁ = info.λ₁ fx = iter.f(state.z + v * state.α) # summarize x = y = state.z + v * state.α @@ -325,17 +382,30 @@ function iterate_evolve_lanczos( σ: $σ, θ: $θ, λ₁: $λ₁, - kᵢ: $info.kₗ, k₁: $k₁, + kᵢ: $kᵥ, k₁: $k₁, df: $df, ρₐ: $ρₐ """ - Δ = min(Δ, v |> norm) - Δ, σ, Df, bool_acc = adaptive_rules_utr(state, df, Df, Δ, σ, ρₐ, γ₁, γ₂, θ, λ₁) + if iter.adaptiverule == :utr + Δ, σ, Df, bool_acc = adaptive_rules_utr( + state, df, + Df, Δ, σ, ρₐ, γ₁, γ₂, + θ, λ₁ - σ + ) + elseif iter.adaptiverule ∈ [:constant, :mishchenko] + # do nothing + bool_acc = true + else + throw( + ErrorException("unrecognized adaptive mode $(iter.adaptiverule)") + ) + end !bool_acc && continue # do this when accept state.σ = max(1e-18, σ / grad_regularizer) state.r = max(Δ / grad_regularizer, 1e-1) state.k₂ += k₂ + state.kᵥ += kᵥ state.kₜ = k₂ state.x = x state.y = y @@ -423,9 +493,10 @@ utr_stopping_criterion(tol, state::UTRState) = function counting(iter::T, state::S) where {T<:UTRIteration,S<:UTRState} try state.kf = getfield(iter.f, :counter) - state.kg = getfield(iter.g, :counter) state.kH = hasproperty(iter.H, :counter) ? getfield(iter.H, :counter) : 0 state.kh = hasproperty(iter.hvp, :counter) ? getfield(iter.hvp, :counter) : 0 + state.kg = getfield(iter.g, :counter) + state.kgh = state.kg + state.kh * 2 catch end end @@ -514,6 +585,7 @@ function Base.show(io::IO, t::T) where {T<:UTRIteration} (t.mainstrategy == :newton) && @printf io " !!! reduce to Regularized Newton Method\n" (t.initializerule == :mishchenko) && @printf io " !!! - use Mishchenko's strategy\n" + (t.initializerule == :unscaled) && @printf io " !!! - use regularization without gradient norm\n" (t.adaptiverule == :constant) && @printf io " !!! - use fixed regularization\n" println(io, "-"^length(t.LOG_SLOTS)) flush(io) @@ -527,10 +599,12 @@ function summarize(io::IO, k::Int, t::T, s::S) where {T<:UTRIteration,S<:UTRStat println(io, "oracle calls:") @printf io " (main) k := %d \n" s.k @printf io " (function) f := %d \n" s.kf - @printf io " (first-order) g(+hvp) := %d \n" s.kg + s.kh - @printf io " (first-order) hvp := %d \n" s.kh + @printf io " (first-order) g := %d \n" s.kg + @printf io " (first-order) g(+hvp) := %d \n" s.kgh @printf io " (second-order) H := %d \n" s.kH + @printf io " (second-order) hvp := %d \n" s.kh @printf io " (sub-problem) P := %d \n" s.k₂ + @printf io " (sub-calls) kᵥ := %d \n" s.kᵥ @printf io " (running time) t := %.3f \n" s.t println(io, "-"^length(t.LOG_SLOTS)) flush(io) diff --git a/src/utilities/homogeneous.jl b/src/utilities/homogeneous.jl index 613d966..5cdecff 100644 --- a/src/utilities/homogeneous.jl +++ b/src/utilities/homogeneous.jl @@ -1,12 +1,13 @@ # beta: adaptive hsodm subproblem -# Base.@kwdef mutable struct AdaptiveHomogeneousSubproblem - +using Dates using Roots using SparseArrays using ArnoldiMethod using LDLFactorizations -using Krylov + +# using Krylov +using KrylovKit using LinearOperators @@ -325,16 +326,65 @@ d, __unused_info = KrylovKit.linsolve( """ function NewtonStep(iter::I, μ, g, state; verbose::Bool=false ) where {I} + @debug "started Newton-step @" Dates.now() + n = g |> length opH = LinearOperator(Float64, n, n, true, true, (y, v) -> iter.ff(y, v)) - d, __unused_info = cg(opH, -Vector(g); verbose=0, atol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, itmax=200) - # if norm(d) < 1e-8 - # f(v) = iter.ff(state.∇fb, v) - # d, __unused_info = KrylovKit.linsolve( - # f, -Vector(g); - # isposdef=true, issymmetric=true - # ) - # end - return 1, 1, d, norm(d), d' * state.∇f, d' * state.∇fb + d, _info = cg( + opH, -Vector(g); + # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, + rtol=(g |> norm) * 1e-4, + itmax=200, + verbose=verbose ? 3 : 0 + ) + + return _info.niter, 1, d, norm(d), d' * state.∇f, d' * state.∇fb + # f(v) = iter.ff(state.∇fb, v) + # d, _info = KrylovKit.linsolve( + # f, -Vector(g), -Vector(g), + # # isposdef=true, + # # issymmetric=true, + # # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, + # # maxiter=200, + # # verbosity=3 + # CG(; + # # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, + # tol=1e-4 * (g |> norm), + # maxiter=n * 2, + # verbosity=verbose ? 3 : 0 + # ), + # ) + # return _info.numops, 1, d, norm(d), d' * state.∇f, d' * state.∇fb +end + +function NewtonStep(opH::LinearOperator, g, state; verbose::Bool=false) + @debug "started Newton-step @" Dates.now() + n = g |> length + gn = (g |> norm) + d, _info = cg( + opH, -Vector(g); + # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, + rtol=state.ϵ > 1e-4 ? gn * 1e-4 : gn * 1e-5, + itmax=200, + verbose=verbose ? 3 : 0 + ) + + return _info.niter, 1, d, norm(d), d' * state.∇f, d' * state.∇fb + # f(v) = iter.ff(state.∇fb, v) + # d, _info = KrylovKit.linsolve( + # f, -Vector(g), -Vector(g), + # # isposdef=true, + # # issymmetric=true, + # # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, + # # maxiter=200, + # # verbosity=3 + # CG(; + # # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, + # tol=1e-4*(g|>norm), + # maxiter=n*2, + # verbosity=3 + # ), + # ) + end diff --git a/test/convex/test_logistic.jl b/test/convex/test_logistic.jl index eb5d1e9..a2682ff 100644 --- a/test/convex/test_logistic.jl +++ b/test/convex/test_logistic.jl @@ -44,15 +44,15 @@ bool_plot = true bool_q_preprocessed = true f1(A, d=2) = sqrt.(sum(abs2.(A), dims=d)) -ε = 1e-6 # * max(g(x0) |> norm, 1) +ε = 2e-8 # * max(g(x0) |> norm, 1) λ = 1e-5 if bool_q_preprocessed # name = "a4a" # name = "a9a" # name = "w4a" # name = "covtype" - # name = "news20" - name = "rcv1" + name = "news20" + # name = "rcv1" X, y = libsvmread("test/instances/libsvm/$name.libsvm"; dense=false) Xv = hcat(X...)' @@ -178,88 +178,101 @@ if bool_opt time_limit=500 ) - rn1 = PFH(name=Symbol("iNewton-1e-7"))(; + rn1 = PFH(name=Symbol("iNewton"))(; x0=copy(x0), f=loss, g=g, hvp=hvpdiff, - maxiter=40, tol=ε, freq=1, + maxiter=300, tol=ε, freq=1, step=:newton, μ₀=0.0, maxtime=500, linesearch=:backtrack, bool_trace=true, - eigtol=1e-7, direction=:warm ) - # rn2 = PFH(name=Symbol("iNewton-1e-8"))(; - # x0=copy(x0), f=loss, g=g, hvp=hvpdiff, - # maxiter=60, tol=ε, freq=1, - # step=:newton, μ₀=0.0, - # maxtime=500, linesearch=:backtrack, - # bool_trace=true, - # eigtol=1e-8, - # direction=:warm - # ) - # rn3 = PFH(name=Symbol("iNewton-1e-9"))(; - # x0=copy(x0), f=loss, g=g, hvp=hvpdiff, - # maxiter=60, tol=ε, freq=1, - # step=:newton, μ₀=0.0, - # maxtime=500, linesearch=:backtrack, - # bool_trace=true, - # eigtol=1e-9, - # direction=:warm - # ) - # ra = HSODM(name=Symbol("adaptive-HSODM"))(; - # x0=copy(x0), f=loss, g=g, hvp=hvpdiff, - # maxiter=10000, tol=ε, freq=1, - # maxtime=500, - # direction=:warm, linesearch=:hagerzhang, - # adaptive=:none - # ) + rn2 = UTR(name=Symbol("iNewton-Grad-1e-4"))(; + x0=copy(x0), f=loss, g=g, hvp=hvpdiff, + maxiter=300, tol=ε, freq=1, + bool_trace=true, + subpstrategy=:lanczos, + mainstrategy=:newton, + initializerule=:unscaled, + adaptiverule=:constant, + σ₀=1e-4, + maxtime=1500 + ) + + rn3 = UTR(name=Symbol("iNewton-Grad-1e-3"))(; + x0=copy(x0), f=loss, g=g, hvp=hvpdiff, + maxiter=300, tol=ε, freq=1, + bool_trace=true, + subpstrategy=:lanczos, + mainstrategy=:newton, + initializerule=:undef, + adaptiverule=:constant, + σ₀=1e-3, + maxtime=1500 + ) + + ra = HSODM(name=Symbol("Adaptive-HSODM"))(; + x0=copy(x0), f=loss, g=g, hvp=hvpdiff, + maxiter=10000, tol=ε, freq=1, + direction=:warm, linesearch=:hagerzhang, + adaptive=:none, + maxtime=1500 + ) rh = PFH(name=Symbol("PF-HSODM"))(; x0=copy(x0), f=loss, g=g, hvp=hvpdiff, maxiter=10000, tol=ε, freq=1, step=:hsodm, μ₀=5e-2, bool_trace=true, + direction=:warm, maxtime=500, - direction=:warm ) - ru = UTR(name=Symbol("Universal-TRS"))(; - x0=copy(x0), f=loss, g=g, H=H, - maxiter=10000, tol=1e-6, freq=1, - direction=:warm, subpstrategy=:lanczos - ) + # ru = UTR(name=Symbol("Universal-TRS"))(; + # x0=copy(x0), f=loss, g=g, H=H, + # maxiter=10000, tol=1e-6, freq=1, + # direction=:warm, subpstrategy=:lanczos + # ) end if bool_plot results = [ - # optim_to_result(res1, "GD+Wolfe"), - # optim_to_result(r_lbfgs, "LBFGS+Wolfe"), ra, rh, rn1, rn2, rn3, - ru + # ru ] - linestyles = [:dash, :dot, :dashdot, :dashdotdot] method_names = [ L"\texttt{Adaptive-HSODM}", L"\texttt{Homotopy-HSODM}", - L"\texttt{iNewton}-$10^{-7}$", - L"\texttt{iNewton}-$10^{-8}$", - L"\texttt{iNewton}-$10^{-9}$", - L"\texttt{Universal-TR}" + L"\texttt{iNewton}", + L"\texttt{iNewton-Grad-1e-4}", + L"\texttt{iNewton-Grad-1e-3}", + # L"\texttt{Universal-TR}" ] - for xaxis in (:t, :k) - for metric in (:ϵ, :fx) + # for xaxis in (:t, :k, :kg) + for xaxis in (:t, :k, :kgh) + # for metric in (:ϵ, :fx) + for metric in [:ϵ] @printf("plotting results\n") pgfplotsx() title = L"Logistic Regression $\texttt{name}:=\texttt{%$(name)}$, $n:=$%$(n), $N:=$%$(N)" + if xaxis == :t + xtitle = L"$\textrm{Time}$" + elseif xaxis == :k + xtitle = L"$\textrm{Iterations}$" + else + xtitle = L"$\textrm{Grad-Evaluations}$" + end fig = plot( - xlabel=xaxis == :t ? L"\textrm{Running Time (s)}" : L"\textrm{Iterations}", + xlabel=xtitle, ylabel=metric == :ϵ ? L"\|\nabla f\|" : L"f(x)", + yaxis=(formatter = y -> @sprintf("%.1e", y)), + # formatter=:scientific, title=title, size=(600, 500), yticks=[1e-12, 1e-10, 1e-8, 1e-6, 1e-4, 1e-2, 1e-1, 1e0, 1e1], @@ -278,8 +291,16 @@ if bool_plot ) for (k, rv) in enumerate(results) yv = getresultfield(rv, metric) + if xaxis == :t + xa = getresultfield(rv, :t) + elseif xaxis == :k + xa = 1:(yv|>length) + else + xa = getresultfield(rv, :kgh) + end + plot!(fig, - xaxis == :t ? getresultfield(rv, :t) : (1:(yv|>length)), + xa, yv, label=method_names[k], linewidth=1.5, @@ -290,9 +311,9 @@ if bool_plot # seriescolor=colors[k] ) end - savefig(fig, "/tmp/$metric-logistic-$name-$xaxis.tex") - savefig(fig, "/tmp/$metric-logistic-$name-$xaxis.pdf") - savefig(fig, "/tmp/$metric-logistic-$name-$xaxis.png") + # savefig(fig, "./$metric-logistic-$name-$xaxis.tex") + savefig(fig, "./$metric-logistic-$name-$xaxis.pdf") + savefig(fig, "./$metric-logistic-$name-$xaxis.png") end end end From 0cb8efbb592dafd5e7400a0f2bd560096526e4ee Mon Sep 17 00:00:00 2001 From: cz Date: Thu, 14 Mar 2024 03:09:49 +0800 Subject: [PATCH 11/17] 0313 --- Project.toml | 1 + src/algorithms/hsodm.jl | 5 +- src/utilities/backtracking.jl | 3 +- src/utilities/homogeneous.jl | 21 +- src/utilities/linesearches.jl | 5 +- test/convex/test_hilbert.jl | 43 +-- test/convex/test_linear_system.jl | 190 ++++++------ test/convex/test_logistic.jl | 83 ++--- test/cutest-benchmark/hsodm_paper_test.jl | 73 +++++ test/cutest-benchmark/test_cutest_batch.jl | 3 +- test/cutest-benchmark/test_setup.jl | 7 +- test/tools.jl | 340 +++++++++++---------- 12 files changed, 415 insertions(+), 359 deletions(-) diff --git a/Project.toml b/Project.toml index cdb9337..5d92e7e 100644 --- a/Project.toml +++ b/Project.toml @@ -20,6 +20,7 @@ LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" LoggingFormats = "98105f81-4425-4516-93fd-1664fb551ab6" +NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/src/algorithms/hsodm.jl b/src/algorithms/hsodm.jl index 3e15d08..42b4ec2 100644 --- a/src/algorithms/hsodm.jl +++ b/src/algorithms/hsodm.jl @@ -22,7 +22,7 @@ Base.@kwdef mutable struct HSODMIteration{Tx,Tf,Tϕ,Tg,TH,Th} x0::Tx # initial point t::Dates.DateTime = Dates.now() adaptive_param = AR() # todo - eigtol::Float64 = 1e-12 + eigtol::Float64 = 1e-8 itermax::Int64 = 20 direction = :warm linesearch = :hagerzhang @@ -144,10 +144,11 @@ function Base.iterate(iter::HSODMIteration, state::HSODMState{R,Tx}) where {R,Tx state.α, fx, kₜ = HagerZhangLineSearch(iter, state.∇f, state.fx, x, s) end if (state.α == 0) || (iter.linesearch == :trustregion) + # use a trust-region-style line-search algorithm state.α, fx, kₜ = TRStyleLineSearch(iter, state.z, v, vHv, vg, 4 * state.Δ / vn) end if (state.α == 0) || (iter.linesearch == :backtrack) - # use Hager-Zhang line-search algorithm + # use backtrack line-search algorithm s = v x = state.x state.α, fx, kₜ = BacktrackLineSearch(iter, state.∇f, state.fx, x, s) diff --git a/src/utilities/backtracking.jl b/src/utilities/backtracking.jl index 5a44b86..4fd1149 100644 --- a/src/utilities/backtracking.jl +++ b/src/utilities/backtracking.jl @@ -9,6 +9,7 @@ there exists a factor ρ = ρ(c₁) such that α' ≦ ρ α. This is a modification of the algorithm described in Nocedal Wright (2nd ed), Sec. 3.5. """ +using NaNMath using Parameters @with_kw struct BackTrackingEx{TF,TI} c_1::TF = 1e-4 @@ -116,5 +117,5 @@ function (ls::BackTrackingEx)(ϕ, αinitial::Tα, ϕ_0, dϕ_0) where {Tα} ϕx_0, ϕx_1 = ϕx_1, ϕ(α_2) end - return α_2, ϕx_1 + return α_2, ϕx_1, iteration end \ No newline at end of file diff --git a/src/utilities/homogeneous.jl b/src/utilities/homogeneous.jl index 5cdecff..f644ef2 100644 --- a/src/utilities/homogeneous.jl +++ b/src/utilities/homogeneous.jl @@ -259,22 +259,24 @@ function eigenvalue( ) where {F<:Union{SparseMatrixCSC{Float64,Int64},Matrix{Float64}}} n = length(state.x) + _reltol = state.ϵ > 1e-4 ? 1e-5 : iter.eigtol + _tol = _reltol # * state.ϵ if bg == :krylov if iter.direction == :cold - vals, vecs, info = KrylovKit.eigsolve(B, n + 1, 1, :SR, Float64; tol=state.ϵ > 1e-4 ? 1e-6 : iter.eigtol, issymmetric=true, eager=true) + vals, vecs, info = KrylovKit.eigsolve(B, n + 1, 1, :SR, Float64; tol=_tol, issymmetric=true, eager=true) else - vals, vecs, info = KrylovKit.eigsolve(B, state.ξ, 1, :SR; tol=state.ϵ > 1e-4 ? 1e-6 : iter.eigtol, issymmetric=true, eager=true) + vals, vecs, info = KrylovKit.eigsolve(B, state.ξ, 1, :SR; tol=_tol, issymmetric=true, eager=true) end return vals[1], vecs[1], info end if bg == :arnoldi try # arnoldi is not stable? - decomp, history = ArnoldiMethod.partialschur(B, nev=1, restarts=50000, tol=state.ϵ > 1e-4 ? 1e-6 : iter.eigtol, which=SR()) + decomp, history = ArnoldiMethod.partialschur(B, nev=1, restarts=50000, tol=_reltol, which=SR()) vals, vecs = partialeigen(decomp) return vals[1], vecs, ArnoldiInfo(numops=1) catch - vals, vecs, info = KrylovKit.eigsolve(B, n + 1, 1, :SR, Float64; tol=state.ϵ > 1e-4 ? 1e-6 : iter.eigtol, issymmetric=true, eager=true) + vals, vecs, info = KrylovKit.eigsolve(B, n + 1, 1, :SR, Float64; tol=_tol, issymmetric=true, eager=true) return vals[1], vecs[1], info end end @@ -282,12 +284,13 @@ end function eigenvalue(f::Function, iter, state; bg=:krylov) + _tol = (state.ϵ > 1e-4 ? 1e-5 : iter.eigtol) # * state.ϵ if bg == :krylov if iter.direction == :cold n = length(state.x) - vals, vecs, info = KrylovKit.eigsolve(f, n + 1, 1, :SR, Float64; tol=state.ϵ > 1e-4 ? 1e-6 : iter.eigtol, issymmetric=true, eager=true) + vals, vecs, info = KrylovKit.eigsolve(f, n + 1, 1, :SR, Float64; tol=_tol, issymmetric=true, eager=true) else - vals, vecs, info = KrylovKit.eigsolve(f, state.ξ, 1, :SR; tol=state.ϵ > 1e-4 ? 1e-6 : iter.eigtol, issymmetric=true, eager=true) + vals, vecs, info = KrylovKit.eigsolve(f, state.ξ, 1, :SR; tol=_tol, issymmetric=true, eager=true) end end return vals, vecs, info @@ -329,11 +332,12 @@ function NewtonStep(iter::I, μ, g, state; verbose::Bool=false @debug "started Newton-step @" Dates.now() n = g |> length + gn = (g |> norm) opH = LinearOperator(Float64, n, n, true, true, (y, v) -> iter.ff(y, v)) d, _info = cg( opH, -Vector(g); # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, - rtol=(g |> norm) * 1e-4, + rtol=state.ϵ > 1e-4 ? gn * 1e-4 : gn * 1e-6, itmax=200, verbose=verbose ? 3 : 0 ) @@ -363,8 +367,7 @@ function NewtonStep(opH::LinearOperator, g, state; verbose::Bool=false) gn = (g |> norm) d, _info = cg( opH, -Vector(g); - # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, - rtol=state.ϵ > 1e-4 ? gn * 1e-4 : gn * 1e-5, + rtol=state.ϵ > 1e-4 ? gn * 1e-4 : gn * 1e-6, itmax=200, verbose=verbose ? 3 : 0 ) diff --git a/src/utilities/linesearches.jl b/src/utilities/linesearches.jl index 8268c5a..78f8f43 100644 --- a/src/utilities/linesearches.jl +++ b/src/utilities/linesearches.jl @@ -65,9 +65,10 @@ function BacktrackLineSearch( dϕ_0 = dot(s, gx) try - α, fx, it = lsb(ϕ, dϕ, ϕdϕ, 1.0, fx, dϕ_0) + α, fx, it = lsb(ϕ, dϕ, ϕdϕ, 4.0, fx, dϕ_0) return α, fx, it catch y + throw(y) isa(y, LineSearchException) # && println() # todo return 0.1, fx, 1 @@ -98,7 +99,7 @@ function BacktrackLineSearch( dϕ_0 = dot(s, gx) try - α, fx = lsb(ϕ, dϕ, ϕdϕ, 1.0, fx, dϕ_0) + α, fx = lsb(ϕ, dϕ, ϕdϕ, 4.0, fx, dϕ_0) return α, fx catch y # throw(y) diff --git a/test/convex/test_hilbert.jl b/test/convex/test_hilbert.jl index 8b394c7..e97e6e5 100644 --- a/test/convex/test_hilbert.jl +++ b/test/convex/test_hilbert.jl @@ -44,53 +44,57 @@ Random.seed!(1) H = Hilbert(n) +# H = rand(Float64, n, n) +# H = H'H + 1e-6 * I g = rand(Float64, n) for δ in [1e-5, 1e-7, 1e-9, 1e-10] r = Dict() # - r["GHM-Lanczos"] = KrylovInfo(normres=0.0, numops=0) - r["Newton-CG"] = KrylovInfo(normres=0.0, numops=0) - r["Newton-GMRES"] = KrylovInfo(normres=0.0, numops=0) - r["Newton-rGMRES"] = KrylovInfo(normres=0.0, numops=0) + r["GHM (Lanczos)"] = KrylovInfo(normres=0.0, numops=0) + r["CG"] = KrylovInfo(normres=0.0, numops=0) + r["GMRES"] = KrylovInfo(normres=0.0, numops=0) + r["rGMRES"] = KrylovInfo(normres=0.0, numops=0) samples = 5 κ = LinearAlgebra.cond(H + δ .* LinearAlgebra.I) for idx in 1:samples w₀ = rand(Float64, n) - Fw(w) = [H * w[1:end-1] + g .* w[end]; w[1:end-1]' * g + δ * w[end]] + Fw(w) = [H * w[1:end-1] + g .* w[end]; w[1:end-1]' * g - δ * w[end]] Hw(w) = (H + δ .* LinearAlgebra.I) * w Fc = DRSOM.Counting(Fw) Hc = DRSOM.Counting(Hw) @info "data reading finished" max_iteration = 200 - ε = 1e-6 + ε = 1e-7 + gₙ = g |> norm + εₙ = 1e-4 * gₙ rl = KrylovKit.eigsolve( - Fc, [w₀; 1], 1, :SR, Lanczos(tol=ε / 10, maxiter=max_iteration, verbosity=3, eager=true); + Fc, [w₀; 1], 1, :SR, Lanczos(tol=ε, maxiter=max_iteration, verbosity=3, eager=true); ) λ₁ = rl[1] ξ₁ = rl[2][1] - r["GHM-Lanczos"].normres += (Fc(ξ₁) - λ₁ .* ξ₁) |> norm - r["GHM-Lanczos"].numops += rl[end].numops + r["GHM (Lanczos)"].normres += (Fc(ξ₁) - λ₁ .* ξ₁) |> norm + r["GHM (Lanczos)"].numops += rl[end].numops rl = KrylovKit.linsolve( - Hc, -g, w₀, CG(; tol=ε, maxiter=max_iteration, verbosity=3); + Hc, -g, w₀, CG(; tol=εₙ, maxiter=max_iteration, verbosity=3); ) - r["Newton-CG"].normres += ((Hw(rl[1]) + g) |> norm) - r["Newton-CG"].numops += rl[end].numops + r["CG"].normres += ((Hw(rl[1]) + g) |> norm) + r["CG"].numops += rl[end].numops rl = KrylovKit.linsolve( - Hc, -g, w₀, GMRES(; tol=ε, maxiter=1, krylovdim=max_iteration, verbosity=3); + Hc, -g, w₀, GMRES(; tol=εₙ, maxiter=1, krylovdim=max_iteration, verbosity=3); ) - r["Newton-GMRES"].normres += ((Hw(rl[1]) + g) |> norm) - r["Newton-GMRES"].numops += rl[end].numops + r["GMRES"].normres += ((Hw(rl[1]) + g) |> norm) + r["GMRES"].numops += rl[end].numops rl = KrylovKit.linsolve( - Hc, -g, w₀, GMRES(; tol=ε, maxiter=4, krylovdim=div(max_iteration, 4), verbosity=3); + Hc, -g, w₀, GMRES(; tol=εₙ, maxiter=4, krylovdim=div(max_iteration, 4), verbosity=3); ) - r["Newton-rGMRES"].normres += ((Hw(rl[1]) + g) |> norm) - r["Newton-rGMRES"].numops += rl[end].numops + r["rGMRES"].normres += ((Hw(rl[1]) + g) |> norm) + r["rGMRES"].numops += rl[end].numops end for (k, v) in r @@ -126,7 +130,8 @@ fig = groupedbar( bar_position=:dodge, group=df.kappa, palette=:Paired_8, - xlabel="Method", + # xlabel="Method", + leg=:topright, legendfontsize=14, labelfontsize=14, ylabel=L"Krylov Iterations: $K$" diff --git a/test/convex/test_linear_system.jl b/test/convex/test_linear_system.jl index 2fd8c4d..de9ec7a 100644 --- a/test/convex/test_linear_system.jl +++ b/test/convex/test_linear_system.jl @@ -44,107 +44,109 @@ name = "a4a" # name = "news20" # name = "rcv1" # names = ["a4a"] #, "a9a", "w4a", "covtype", "rcv1", "news20"] -names = ["covtype"] #, "a9a", "w4a", "covtype", "rcv1", "news20"] -# names = ["a4a", "a9a", "w4a", "covtype", "rcv1"] +# names = ["covtype"] #, "a9a", "w4a", "covtype", "rcv1", "news20"] +names = ["a4a", "a9a", "w4a", "covtype", "rcv1"] @warn "news20 is very big..., consider run on a server" # use the data matrix of libsvm f1(A, d=2) = sqrt.(sum(abs2.(A), dims=d)) Random.seed!(1) -# for name in names -@info "run $name" -X, y = libsvmread("test/instances/libsvm/$name.libsvm"; dense=false) -Xv = hcat(X...)' -Rc = 1 ./ f1(Xv)[:] -Xv = (Rc |> Diagonal) * Xv -if name in ["covtype"] - y = convert(Vector{Float64}, (y .- 1.5) * 2) -else +for name in names + @info "run $name" + X, y = libsvmread("test/instances/libsvm/$name.libsvm"; dense=false) + Xv = hcat(X...)' + Rc = 1 ./ f1(Xv)[:] + Xv = (Rc |> Diagonal) * Xv + if name in ["covtype"] + y = convert(Vector{Float64}, (y .- 1.5) * 2) + else + end + + γ = 1e-4 + n = Xv[1, :] |> length + Random.seed!(1) + N = y |> length + + Q = Xv' * Xv + function gfs(w) + return (Q * w - Xv' * y) / N + γ * w + end + r = Dict() + # + r["GHM-Lanczos"] = KrylovInfo(normres=0.0, numops=0) + r["Newton-CG"] = KrylovInfo(normres=0.0, numops=0) + r["Newton-GMRES"] = KrylovInfo(normres=0.0, numops=0) + r["Newton-rGMRES"] = KrylovInfo(normres=0.0, numops=0) + + samples = 5 + for idx in 1:samples + w₀ = rand(Float64, n) + g = gfs(w₀) + δ = -0.1 + ϵᵧ = 1e-5 + function hvp(w) + gn = gfs(w₀ + ϵᵧ .* w) + gf = g + return (gn - gf) / ϵᵧ + end + function hvp!(y, w) + gn = gfs(w₀ + ϵᵧ .* w) + gf = g + return copyto!(y, (gn - gf) / ϵᵧ) + end + Fw(w) = [hvp(w[1:end-1]) + g .* w[end]; w[1:end-1]' * g + δ * w[end]] + Fc = DRSOM.Counting(Fw) + Hc = DRSOM.Counting(hvp) + @info "data reading finished" + + max_iteration = 200 + ε = 1e-7 + gₙ = g |> norm + εₙ = 1e-4 * gₙ + + rl = KrylovKit.eigsolve( + Fc, [w₀; 1], 1, :SR, Lanczos(tol=ε, maxiter=max_iteration, verbosity=3, eager=true); + ) + λ₁ = rl[1] + ξ₁ = rl[2][1] + + r["GHM-Lanczos"].normres += (Fc(ξ₁) - λ₁ .* ξ₁) |> norm + r["GHM-Lanczos"].numops += rl[end].numops + + if true + rl = KrylovKit.linsolve( + Hc, -g, w₀, CG(; tol=εₙ, maxiter=max_iteration, verbosity=3); + ) + r["Newton-CG"].normres += ((hvp(rl[1]) + g) |> norm) + r["Newton-CG"].numops += rl[end].numops + rl = KrylovKit.linsolve( + Hc, -g, w₀, GMRES(; tol=εₙ, maxiter=1, krylovdim=max_iteration, verbosity=3); + ) + r["Newton-GMRES"].normres += ((hvp(rl[1]) + g) |> norm) + r["Newton-GMRES"].numops += rl[end].numops + rl = KrylovKit.linsolve( + Hc, -g, w₀, GMRES(; tol=εₙ, maxiter=4, krylovdim=div(max_iteration, 4), verbosity=3); + ) + r["Newton-rGMRES"].normres += ((hvp(rl[1]) + g) |> norm) + r["Newton-rGMRES"].numops += rl[end].numops + end + ################################ + # ALTERNATIVELY, USE Krylov.jl + ################################ + # opH = LinearOperator( + # Float64, n, n, true, true, + # (y, v) -> hvp!(y, v) + # ) + # d, __unused_info = cg( + # opH, -g; verbose=1, itmax=200 + # ) + + end + for (k, v) in r + push!(table, [name, n, k, v.numops / samples, v.normres / samples]) + end end -γ = 1e-10 -n = Xv[1, :] |> length -Random.seed!(1) -N = y |> length - -Q = Xv' * Xv -function gfs(w) - return (Q * w - Xv' * y) / N + γ * w -end -r = Dict() -# -r["GHM-Lanczos"] = KrylovInfo(normres=0.0, numops=0) -r["Newton-CG"] = KrylovInfo(normres=0.0, numops=0) -r["Newton-GMRES"] = KrylovInfo(normres=0.0, numops=0) -r["Newton-rGMRES"] = KrylovInfo(normres=0.0, numops=0) - -samples = 5 -# for idx in 1:samples -w₀ = rand(Float64, n) -g = gfs(w₀) -δ = -0.1 -ϵᵧ = 1e-5 -function hvp(w) - gn = gfs(w₀ + ϵᵧ .* w) - gf = g - return (gn - gf) / ϵᵧ -end -function hvp!(y, w) - gn = gfs(w₀ + ϵᵧ .* w) - gf = g - return copyto!(y, (gn - gf) / ϵᵧ) -end -Fw(w) = [hvp(w[1:end-1]) + g .* w[end]; w[1:end-1]' * g + δ * w[end]] -Fc = DRSOM.Counting(Fw) -Hc = DRSOM.Counting(hvp) -@info "data reading finished" - -max_iteration = 200 -ε = 1e-6 - -rl = KrylovKit.eigsolve( - Fc, [w₀; 1], 1, :SR, Lanczos(tol=ε / 10, maxiter=max_iteration, verbosity=3, eager=true); -) -λ₁ = rl[1] -ξ₁ = rl[2][1] - -r["GHM-Lanczos"].normres += (Fc(ξ₁) - λ₁ .* ξ₁) |> norm -r["GHM-Lanczos"].numops += rl[end].numops - -if false: - rl = KrylovKit.linsolve( - Hc, -g, w₀, CG(; tol=ε, maxiter=max_iteration, verbosity=3); - ) - r["Newton-CG"].normres += ((hvp(rl[1]) + g) |> norm) - r["Newton-CG"].numops += rl[end].numops - rl = KrylovKit.linsolve( - Hc, -g, w₀, GMRES(; tol=ε, maxiter=1, krylovdim=max_iteration, verbosity=3); - ) - r["Newton-GMRES"].normres += ((hvp(rl[1]) + g) |> norm) - r["Newton-GMRES"].numops += rl[end].numops - rl = KrylovKit.linsolve( - Hc, -g, w₀, GMRES(; tol=ε, maxiter=4, krylovdim=div(max_iteration, 4), verbosity=3); - ) - r["Newton-rGMRES"].normres += ((hvp(rl[1]) + g) |> norm) - r["Newton-rGMRES"].numops += rl[end].numops - -################################ -# ALTERNATIVELY, USE Krylov.jl -################################ -# opH = LinearOperator( -# Float64, n, n, true, true, -# (y, v) -> hvp!(y, v) -# ) -# d, __unused_info = cg( -# opH, -g; verbose=1, itmax=200 -# ) - -# end -for (k, v) in r - push!(table, [name, n, k, v.numops / samples, v.normres / samples]) -end -# end - tmat = hcat(table...) df = DataFrame( name=tmat[1, :], diff --git a/test/convex/test_logistic.jl b/test/convex/test_logistic.jl index a2682ff..fe946cc 100644 --- a/test/convex/test_logistic.jl +++ b/test/convex/test_logistic.jl @@ -39,20 +39,19 @@ using .LP using LoopVectorization using LIBSVMFileIO -bool_opt = true +bool_q_preprocessed = bool_opt = false bool_plot = true -bool_q_preprocessed = true f1(A, d=2) = sqrt.(sum(abs2.(A), dims=d)) -ε = 2e-8 # * max(g(x0) |> norm, 1) -λ = 1e-5 +ε = 1.5e-8 # * max(g(x0) |> norm, 1) +λ = 5e-6 if bool_q_preprocessed # name = "a4a" # name = "a9a" # name = "w4a" # name = "covtype" - name = "news20" # name = "rcv1" + name = "news20" X, y = libsvmread("test/instances/libsvm/$name.libsvm"; dense=false) Xv = hcat(X...)' @@ -85,46 +84,6 @@ if bool_q_preprocessed x0 = 10 * randn(Float64, n) - # function g1(w) - # function _g(x, y, w) - # ff = exp(-y * w' * x) - # return -ff / (1 + ff) * y * x - # end - # _pure = vmapreduce( - # (x, y) -> _g(x, y, w), - # +, - # X, - # y - # ) - # return _pure / N + λ * w - # end - # function hvp1(w, v, Hv; eps=1e-8) - # function _hvp(x, y, q, w, v) - # wx = w' * x - # ff = exp(-y * wx) - # return ff / (1 + ff)^2 * q * x' * v - # end - # _pure = vmapreduce( - # (x, y, q) -> _hvp(x, y, q, w, v), - # +, - # X, - # y, - # P - # ) - # # copyto!(Hv, 1 / eps .* g(w + eps .* v) - 1 / eps .* g(w)) - # copyto!(Hv, _pure ./ N .+ λ .* v) - # end - - # function loss1(w) - # _pure = vmapreduce( - # (x, c) -> log(1 + exp(-c * w' * x)), - # +, - # X, - # y - # ) - # return _pure / N + 0.5 * λ * w'w - # end - function loss(w) z = log.(1 .+ exp.(-y .* (Xv * w))) |> sum return z / N + 0.5 * λ * w'w @@ -178,13 +137,16 @@ if bool_opt time_limit=500 ) - rn1 = PFH(name=Symbol("iNewton"))(; + rn1 = UTR(name=Symbol("iNewton-Grad-1e-5"))(; x0=copy(x0), f=loss, g=g, hvp=hvpdiff, maxiter=300, tol=ε, freq=1, - step=:newton, μ₀=0.0, - maxtime=500, linesearch=:backtrack, bool_trace=true, - direction=:warm + subpstrategy=:lanczos, + mainstrategy=:newton, + initializerule=:unscaled, + adaptiverule=:constant, + σ₀=5e-5, + maxtime=1500 ) rn2 = UTR(name=Symbol("iNewton-Grad-1e-4"))(; @@ -211,14 +173,6 @@ if bool_opt maxtime=1500 ) - ra = HSODM(name=Symbol("Adaptive-HSODM"))(; - x0=copy(x0), f=loss, g=g, hvp=hvpdiff, - maxiter=10000, tol=ε, freq=1, - direction=:warm, linesearch=:hagerzhang, - adaptive=:none, - maxtime=1500 - ) - rh = PFH(name=Symbol("PF-HSODM"))(; x0=copy(x0), f=loss, g=g, hvp=hvpdiff, maxiter=10000, tol=ε, freq=1, @@ -228,11 +182,14 @@ if bool_opt maxtime=500, ) - # ru = UTR(name=Symbol("Universal-TRS"))(; - # x0=copy(x0), f=loss, g=g, H=H, - # maxiter=10000, tol=1e-6, freq=1, - # direction=:warm, subpstrategy=:lanczos - # ) + ra = HSODM(name=Symbol("Adaptive-HSODM"))(; + x0=copy(x0), f=loss, g=g, hvp=hvpdiff, + maxiter=10000, tol=ε, freq=1, + direction=:warm, linesearch=:hagerzhang, + adaptive=:none, + maxtime=1500 + ) + end @@ -248,7 +205,7 @@ if bool_plot method_names = [ L"\texttt{Adaptive-HSODM}", L"\texttt{Homotopy-HSODM}", - L"\texttt{iNewton}", + L"\texttt{iNewton-Grad-1e-5}", L"\texttt{iNewton-Grad-1e-4}", L"\texttt{iNewton-Grad-1e-3}", # L"\texttt{Universal-TR}" diff --git a/test/cutest-benchmark/hsodm_paper_test.jl b/test/cutest-benchmark/hsodm_paper_test.jl index f226d78..5e31255 100644 --- a/test/cutest-benchmark/hsodm_paper_test.jl +++ b/test/cutest-benchmark/hsodm_paper_test.jl @@ -2,6 +2,79 @@ # unconstrained problems 2022/11/04 # I select a set of unconstrained problems TEST = [["EXTROSNB", "N=100"]] + + +UNC_PROBLEM_NO_PARAMS = [ + ["ALLINITU", ""], + ["ARGLINA", ""], + ["BARD", ""], + ["BEALE", ""], + ["BIGGS6", ""], + ["BOX3", ""], + ["BRKMCC", ""], + ["BROWNAL", ""], + ["BROWNBS", ""], + ["BROWNDEN", ""], + ["CHNROSNB", ""], + ["CLIFF", ""], + ["CUBE", ""], + ["DENSCHNA", ""], + ["DENSCHNB", ""], + ["DENSCHNC", ""], + ["DENSCHND", ""], + ["DENSCHNE", ""], + ["DENSCHNF", ""], + ["DJTL", ""], + ["ENGVAL2", ""], + ["ERRINROS", ""], + ["EXPFIT", ""], + ["GENROSEB", ""], + ["GROWTHLS", ""], + ["GULF", ""], + ["HAIRY", ""], + ["HATFLDD", ""], + ["HATFLDE", ""], + ["HEART6LS", ""], + ["HEART8LS", ""], + ["HELIX", ""], + ["HIMMELBB", ""], + ["HUMPS", ""], + ["HYDC20LS", ""], + ["JENSMP", ""], + ["KOWOSB", ""], + ["LOGHAIRY", ""], + ["MANCINO", ""], + ["MEXHAT", ""], + ["MEYER3", ""], + ["OSBORNEA", ""], + ["OSBORNEB", ""], + ["PALMER5C", ""], + ["PALMER6C", ""], + ["PALMER7C", ""], + ["PALMER8C", ""], + ["PARKCH", ""], + ["PENALTY2", ""], + ["PENALTY3", ""], + ["PFIT1LS", ""], + ["PFIT2LS", ""], + ["PFIT3LS", ""], + ["PFIT4LS", ""], + ["ROSENBR", ""], + ["S308", ""], + ["SENSORS", ""], + ["SINEVAL", ""], + ["SISSER", ""], + ["SNAIL", ""], + ["STREG", ""], + ["TOINTGOR", ""], + ["TOINTPSP", ""], + ["VARDIM", ""], + ["VIBRBEAM", ""], + ["WATSON", ""], + ["YFITU", ""] +] + + UNC_PROBLEMS_4to200 = [ ["ARGLINA", "M=200,N=200"], ["ARGLINB", "M=200,N=200"], diff --git a/test/cutest-benchmark/test_cutest_batch.jl b/test/cutest-benchmark/test_cutest_batch.jl index 4016e2a..611dc64 100644 --- a/test/cutest-benchmark/test_cutest_batch.jl +++ b/test/cutest-benchmark/test_cutest_batch.jl @@ -72,7 +72,8 @@ for (f, param_combination) in PROBLEMS r = v(x0, loss, g, H, options_drsom; hvp=hvp) line = [ precision, nlp.meta.name, "\"$pc\"", nlp.meta.nvar, k, - r.k, r.state.kf, r.state.kg + r.state.kh, r.state.kH, + # r.k, r.state.kf, r.state.kg + r.state.kh, r.state.kH, + r.k, neval_obj(nlp), neval_grad(nlp) + neval_hprod(nlp), neval_hess(nlp), r.state.ϵ, r.state.fx, r.state.t, r.state.ϵ <= this_tol ] diff --git a/test/cutest-benchmark/test_setup.jl b/test/cutest-benchmark/test_setup.jl index a39c7e1..2ea2d9a 100644 --- a/test/cutest-benchmark/test_setup.jl +++ b/test/cutest-benchmark/test_setup.jl @@ -33,8 +33,8 @@ filter_cutest_problem(nlp) = true # filter_optimization_method(k) = k ∈ [:LBFGS, :HSODM] # filter_optimization_method(k) = k ∈ [:DRSOM, :DRSOMHomo] # filter_optimization_method(k) = k == :HSODM -# filter_optimization_method(k) = k == :HSODMhvp -filter_optimization_method(k) = k ∈ [:HSODMhvp, :HSODM] +filter_optimization_method(k) = k == :HSODMhvp +# filter_optimization_method(k) = k ∈ [:HSODMhvp, :ARC, :TRST] # filter_optimization_method(k) = k ∈ [:iUTRhvp] # filter_optimization_method(k) = k == :ARC # filter_optimization_method(k) = k == :TRST @@ -50,7 +50,8 @@ filter_optimization_method(k) = k ∈ [:HSODMhvp, :HSODM] # PROBLEMS = UNC_PROBLEMS_4to200 # PROBLEMS = UNC_PROBLEMS_200to5000 # PROBLEMS = UNC_PROBLEMS_GOOD -PROBLEMS = UNC_PROBLEMS_COMB +PROBLEMS = UNC_PROBLEMS_COMB[155:end] +# PROBLEMS = UNC_PROBLEM_NO_PARAMS if test_before_start ###################################################################### diff --git a/test/tools.jl b/test/tools.jl index 757f637..2ec6291 100644 --- a/test/tools.jl +++ b/test/tools.jl @@ -92,183 +92,193 @@ end export StateOptim, optim_to_result, arc_to_result, arc_stop_to_result +add_optim = true +add_adaptive_reg_jl = true +if add_optim + wrapper_gd(x, loss, g, H, options; kwargs...) = + optim_to_result( + Optim.optimize( + loss, g, x, + GradientDescent(; + alphaguess=LineSearches.InitialStatic(), + linesearch=LineSearches.HagerZhang() + ), + options; + inplace=false + ), "GD+Wolfe" + ) - -wrapper_gd(x, loss, g, H, options; kwargs...) = - optim_to_result( - Optim.optimize( - loss, g, x, - GradientDescent(; - alphaguess=LineSearches.InitialStatic(), - linesearch=LineSearches.HagerZhang() - ), - options; - inplace=false - ), "GD+Wolfe" - ) - -wrapper_cg(x, loss, g, H, options; kwargs...) = - optim_to_result( - Optim.optimize( - loss, g, x, - ConjugateGradient(; - alphaguess=LineSearches.InitialStatic(), - linesearch=LineSearches.HagerZhang() - ), - options; - inplace=false - ), "CG" - ) -wrapper_lbfgs(x, loss, g, H, options; kwargs...) = - optim_to_result( - Optim.optimize( - loss, g, x, - LBFGS(; - m=10, - alphaguess=LineSearches.InitialStatic(), - linesearch=LineSearches.HagerZhang() - ), - options; - inplace=false - ), "LBFGS+Wolfe" - ) -wrapper_newton(x, loss, g, H, options; kwargs...) = - optim_to_result( - Optim.optimize( - loss, g, H, x, - NewtonTrustRegion(), - options; - inplace=false - ), "Newton+TR" - ) -function wrapper_arc(nlp) - reset!(nlp) - stats = ARCqKOp( - nlp, - max_time=max_time, - max_iter=max_iter, - max_eval=typemax(Int64), - verbose=true - # atol=atol, - # rtol=rtol, - # @note: how to set |g|? - ) - # AdaptiveRegularization.jl to my style of results - return arc_to_result(nlp, stats, "ARC") + wrapper_cg(x, loss, g, H, options; kwargs...) = + optim_to_result( + Optim.optimize( + loss, g, x, + ConjugateGradient(; + alphaguess=LineSearches.InitialStatic(), + linesearch=LineSearches.HagerZhang() + ), + options; + inplace=false + ), "CG" + ) + wrapper_lbfgs(x, loss, g, H, options; kwargs...) = + optim_to_result( + Optim.optimize( + loss, g, x, + LBFGS(; + m=10, + alphaguess=LineSearches.InitialStatic(), + linesearch=LineSearches.HagerZhang() + ), + options; + inplace=false + ), "LBFGS+Wolfe" + ) + wrapper_newton(x, loss, g, H, options; kwargs...) = + optim_to_result( + Optim.optimize( + loss, g, H, x, + NewtonTrustRegion(), + options; + inplace=false + ), "Newton+TR" + ) end +if add_adaptive_reg_jl + function wrapper_arc(nlp) + reset!(nlp) + stats, _ = ARCqKOp( + nlp, + max_time=max_time, + max_iter=max_iter, + max_eval=typemax(Int64), + verbose=true + # atol=atol, + # rtol=rtol, + # @note: how to set |g|? + ) + # AdaptiveRegularization.jl to my style of results + return arc_to_result(nlp, stats, "ARC") + end -function wrapper_tr_st(nlp) - reset!(nlp) - stats = ST_TROp( - nlp, - max_time=max_time, - max_iter=max_iter, - max_eval=typemax(Int64), - verbose=true - # atol=atol, - # rtol=rtol, - # @note: how to set |g|? - ) - # AdaptiveRegularization.jl to my style of results - return arc_to_result(nlp, stats, "TRST") + function wrapper_tr_st(nlp) + reset!(nlp) + stats, _ = ST_TROp( + nlp, + max_time=max_time, + max_iter=max_iter, + max_eval=typemax(Int64), + verbose=true + # atol=atol, + # rtol=rtol, + # @note: how to set |g|? + ) + # AdaptiveRegularization.jl to my style of results + return arc_to_result(nlp, stats, "TRST") + end end - ########################################################## # MY VARIANTS ########################################################## +add_drsom = true +add_hsodm = true +add_utr = true +if add_drsom + alg_drsom = DRSOM2() + wrapper_drsom(x, loss, g, H, options; kwargs...) = + alg_drsom(; + x0=copy(x), f=loss, g=g, H=H, + fog=:direct, + sog=:hess, + options... + ) + wrapper_drsomh(x, loss, g, H, options; kwargs...) = + alg_drsom(; + x0=copy(x), f=loss, g=g, H=H, + fog=:direct, + sog=:hess, + options... + ) + wrapper_drsomf(x, loss, g, H, options; kwargs...) = + alg_drsom(; + x0=copy(x), f=loss, + fog=:forward, + sog=:forward, + options... + ) + wrapper_drsomb(x, loss, g, H, options; kwargs...) = + alg_drsom(; + x0=copy(x), f=loss, + fog=:backward, + sog=:backward, + options... + ) + wrapper_drsomd(x, loss, g, H, options; kwargs...) = + alg_drsom(; + x0=copy(x), f=loss, g=g, + fog=:direct, + sog=:prov, + kwargs..., + options... + ) +end +if add_hsodm + alg_hsodm = HSODM() + wrapper_hsodm(x, loss, g, H, options; kwargs...) = + alg_hsodm(; + x0=copy(x), f=loss, g=g, H=H, + linesearch=:hagerzhang, + direction=:warm, + adaptive=:mishchenko, + options... + ) + alg_hsodm_hvp = HSODM() + wrapper_hsodm_hvp(x, loss, g, H, options; kwargs...) = + alg_hsodm_hvp(; + x0=copy(x), f=loss, g=g, + linesearch=:hagerzhang, + # linesearch=:backtrack, + direction=:warm, + adaptive=:mishchenko, + kwargs..., + options... + ) + alg_hsodm_arc = HSODM(; name=:HSODMA) + wrapper_hsodm_arc(x, loss, g, H, options; kwargs...) = + alg_hsodm_arc(; + x0=copy(x), f=loss, g=g, H=H, + linesearch=:none, + direction=:warm, + adaptive=:arc, + options... + ) +end +if add_utr + alg_utr = UTR(; name=:UTR) + wrapper_utr(x, loss, g, H, options; kwargs...) = + alg_utr(; + x0=copy(x), f=loss, g=g, H=H, + subpstrategy=:direct, + options... + ) -alg_drsom = DRSOM2() -wrapper_drsom(x, loss, g, H, options; kwargs...) = - alg_drsom(; - x0=copy(x), f=loss, g=g, H=H, - fog=:direct, - sog=:hess, - options... - ) -wrapper_drsomh(x, loss, g, H, options; kwargs...) = - alg_drsom(; - x0=copy(x), f=loss, g=g, H=H, - fog=:direct, - sog=:hess, - options... - ) -wrapper_drsomf(x, loss, g, H, options; kwargs...) = - alg_drsom(; - x0=copy(x), f=loss, - fog=:forward, - sog=:forward, - options... - ) -wrapper_drsomb(x, loss, g, H, options; kwargs...) = - alg_drsom(; - x0=copy(x), f=loss, - fog=:backward, - sog=:backward, - options... - ) -wrapper_drsomd(x, loss, g, H, options; kwargs...) = - alg_drsom(; - x0=copy(x), f=loss, g=g, - fog=:direct, - sog=:prov, - kwargs..., - options... - ) - -alg_hsodm = HSODM() -wrapper_hsodm(x, loss, g, H, options; kwargs...) = - alg_hsodm(; - x0=copy(x), f=loss, g=g, H=H, - linesearch=:hagerzhang, - direction=:warm, - adaptive=:mishchenko, - options... - ) -alg_hsodm_hvp = HSODM() -wrapper_hsodm_hvp(x, loss, g, H, options; kwargs...) = - alg_hsodm_hvp(; - x0=copy(x), f=loss, g=g, - linesearch=:hagerzhang, - direction=:warm, - adaptive=:mishchenko, - kwargs..., - options... - ) -alg_hsodm_arc = HSODM(; name=:HSODMArC) -wrapper_hsodm_arc(x, loss, g, H, options; kwargs...) = - alg_hsodm_arc(; - x0=copy(x), f=loss, g=g, H=H, - linesearch=:none, - direction=:warm, - adaptive=:arc, - options... - ) -alg_utr = UTR(; name=:UTR) -wrapper_utr(x, loss, g, H, options; kwargs...) = - alg_utr(; - x0=copy(x), f=loss, g=g, H=H, - subpstrategy=:direct, - options... - ) - -wrapper_iutr(x, loss, g, H, options; kwargs...) = - alg_utr(; - x0=copy(x), f=loss, g=g, H=H, - subpstrategy=:lanczos, - options... - ) - -wrapper_iutr_hvp(x, loss, g, H, options; kwargs...) = - alg_utr(; - x0=copy(x), f=loss, g=g, - subpstrategy=:lanczos, - kwargs..., - options... - ) + wrapper_iutr(x, loss, g, H, options; kwargs...) = + alg_utr(; + x0=copy(x), f=loss, g=g, H=H, + subpstrategy=:lanczos, + options... + ) + wrapper_iutr_hvp(x, loss, g, H, options; kwargs...) = + alg_utr(; + x0=copy(x), f=loss, g=g, + subpstrategy=:lanczos, + kwargs..., + options... + ) +end # My solvers and those in Optim.jl From fdfd3dccd9fa42269dbd7b277e6b3f4f00b6327a Mon Sep 17 00:00:00 2001 From: cz Date: Sun, 7 Apr 2024 08:55:57 +0800 Subject: [PATCH 12/17] ss --- src/utilities/homogeneous.jl | 95 +++++++++++++---------------- test/convex/test_logistic.jl | 8 +-- test/cutest-benchmark/test_setup.jl | 3 +- 3 files changed, 47 insertions(+), 59 deletions(-) diff --git a/src/utilities/homogeneous.jl b/src/utilities/homogeneous.jl index f644ef2..8798de4 100644 --- a/src/utilities/homogeneous.jl +++ b/src/utilities/homogeneous.jl @@ -206,7 +206,7 @@ function _inner_homogeneous_eigenvalue( B::Symmetric{Q,F}, iter, state ) where {Q<:Real,F<:Union{SparseMatrixCSC{Float64,Int64},Matrix{Float64}}} n = length(state.x) - vals, vecs, info = eigenvalue(B, iter, state) + vals, vecs, info = _eigenvalue(B, iter, state) λ₁ = vals |> real ξ = vecs[:, 1] |> real @@ -229,7 +229,7 @@ end function _inner_homogeneous_eigenvalue(f::Function, iter, state) n = length(state.x) - vals, vecs, info = eigenvalue(f, iter, state) + vals, vecs, info = _eigenvalue(f, iter, state) λ₁ = vals[1] ξ = vecs[1] @@ -254,13 +254,14 @@ end homogeneous_eigenvalue = Counting(_inner_homogeneous_eigenvalue) -function eigenvalue( +function _eigenvalue( B::Symmetric{Float64,F}, iter, state; bg=:arnoldi ) where {F<:Union{SparseMatrixCSC{Float64,Int64},Matrix{Float64}}} n = length(state.x) - _reltol = state.ϵ > 1e-4 ? 1e-5 : iter.eigtol - _tol = _reltol # * state.ϵ + # _reltol = state.ϵ > 1e-4 ? 1e-5 : iter.eigtol + # _tol = _reltol * state.ϵ + _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-3 * state.ϵ) if bg == :krylov if iter.direction == :cold vals, vecs, info = KrylovKit.eigsolve(B, n + 1, 1, :SR, Float64; tol=_tol, issymmetric=true, eager=true) @@ -283,8 +284,8 @@ function eigenvalue( end -function eigenvalue(f::Function, iter, state; bg=:krylov) - _tol = (state.ϵ > 1e-4 ? 1e-5 : iter.eigtol) # * state.ϵ +function _eigenvalue(f::Function, iter, state; bg=:krylov) + _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-3 * state.ϵ) if bg == :krylov if iter.direction == :cold n = length(state.x) @@ -333,61 +334,47 @@ function NewtonStep(iter::I, μ, g, state; verbose::Bool=false n = g |> length gn = (g |> norm) - opH = LinearOperator(Float64, n, n, true, true, (y, v) -> iter.ff(y, v)) - d, _info = cg( - opH, -Vector(g); - # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, - rtol=state.ϵ > 1e-4 ? gn * 1e-4 : gn * 1e-6, - itmax=200, - verbose=verbose ? 3 : 0 - ) - - return _info.niter, 1, d, norm(d), d' * state.∇f, d' * state.∇fb - # f(v) = iter.ff(state.∇fb, v) - # d, _info = KrylovKit.linsolve( - # f, -Vector(g), -Vector(g), - # # isposdef=true, - # # issymmetric=true, + # opH = LinearOperator(Float64, n, n, true, true, (y, v) -> iter.ff(y, v)) + # d, _info = cg( + # opH, -Vector(g); # # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, - # # maxiter=200, - # # verbosity=3 - # CG(; - # # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, - # tol=1e-4 * (g |> norm), - # maxiter=n * 2, - # verbosity=verbose ? 3 : 0 - # ), + # rtol=state.ϵ > 1e-4 ? gn * 1e-4 : gn * 1e-6, + # itmax=200, + # verbose=verbose ? 3 : 0 # ) - # return _info.numops, 1, d, norm(d), d' * state.∇f, d' * state.∇fb + # return _info.niter, 1, d, norm(d), d' * state.∇f, d' * state.∇fb + f(v) = iter.ff(state.∇fb, v) + d, _info = KrylovKit.linsolve( + f, -Vector(g), -Vector(g), + CG(; + tol=min(gn, 1e-4), + maxiter=n * 2, + verbosity=verbose ? 3 : 0 + ), + ) + return _info.numops, 1, d, norm(d), d' * state.∇f, d' * state.∇fb end function NewtonStep(opH::LinearOperator, g, state; verbose::Bool=false) - @debug "started Newton-step @" Dates.now() + # @debug "started Newton-step @" Dates.now() n = g |> length gn = (g |> norm) - d, _info = cg( - opH, -Vector(g); - rtol=state.ϵ > 1e-4 ? gn * 1e-4 : gn * 1e-6, - itmax=200, - verbose=verbose ? 3 : 0 - ) - - return _info.niter, 1, d, norm(d), d' * state.∇f, d' * state.∇fb - # f(v) = iter.ff(state.∇fb, v) - # d, _info = KrylovKit.linsolve( - # f, -Vector(g), -Vector(g), - # # isposdef=true, - # # issymmetric=true, - # # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, - # # maxiter=200, - # # verbosity=3 - # CG(; - # # rtol=state.ϵ > 1e-4 ? 1e-7 : iter.eigtol, - # tol=1e-4*(g|>norm), - # maxiter=n*2, - # verbosity=3 - # ), + # d, _info = cg( + # opH, -Vector(g); + # rtol=state.ϵ > 1e-4 ? gn * 1e-4 : gn * 1e-6, + # itmax=200, + # verbose=verbose ? 3 : 0 # ) + # return _info.niter, 1, d, norm(d), d' * state.∇f, d' * state.∇fb + f(v) = iter.ff(state.∇fb, v) + d, _info = KrylovKit.linsolve( + f, -Vector(g), -Vector(g), + CG(; + tol=min(gn, 1e-4), + maxiter=n * 2, + verbosity=verbose ? 3 : 0 + ), + ) end diff --git a/test/convex/test_logistic.jl b/test/convex/test_logistic.jl index fe946cc..b095c84 100644 --- a/test/convex/test_logistic.jl +++ b/test/convex/test_logistic.jl @@ -39,19 +39,19 @@ using .LP using LoopVectorization using LIBSVMFileIO -bool_q_preprocessed = bool_opt = false +bool_q_preprocessed = bool_opt = true bool_plot = true f1(A, d=2) = sqrt.(sum(abs2.(A), dims=d)) ε = 1.5e-8 # * max(g(x0) |> norm, 1) -λ = 5e-6 +λ = 1e-6 if bool_q_preprocessed # name = "a4a" # name = "a9a" # name = "w4a" # name = "covtype" - # name = "rcv1" - name = "news20" + name = "rcv1" + # name = "news20" X, y = libsvmread("test/instances/libsvm/$name.libsvm"; dense=false) Xv = hcat(X...)' diff --git a/test/cutest-benchmark/test_setup.jl b/test/cutest-benchmark/test_setup.jl index 2ea2d9a..988a73f 100644 --- a/test/cutest-benchmark/test_setup.jl +++ b/test/cutest-benchmark/test_setup.jl @@ -50,7 +50,8 @@ filter_optimization_method(k) = k == :HSODMhvp # PROBLEMS = UNC_PROBLEMS_4to200 # PROBLEMS = UNC_PROBLEMS_200to5000 # PROBLEMS = UNC_PROBLEMS_GOOD -PROBLEMS = UNC_PROBLEMS_COMB[155:end] +# PROBLEMS = UNC_PROBLEMS_COMB[155:end] +PROBLEMS = UNC_PROBLEMS_COMB # PROBLEMS = UNC_PROBLEM_NO_PARAMS if test_before_start From 01b4b50dea5bd249327edbe4868c04c3ba2cefd6 Mon Sep 17 00:00:00 2001 From: C Zhang Date: Mon, 15 Apr 2024 22:08:11 -0400 Subject: [PATCH 13/17] update --- test/Project.toml | 1 + test/convex/test_hilbert.jl | 33 ++++++++++++++++++++++++--------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/test/Project.toml b/test/Project.toml index 8254d76..469510f 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -5,6 +5,7 @@ ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CUTEst = "1b53aba6-35b6-5f92-a507-53c67d53f819" +CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" DRSOM = "d175e27a-564a-485c-86b6-52e0284acaa2" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" diff --git a/test/convex/test_hilbert.jl b/test/convex/test_hilbert.jl index e97e6e5..cd03366 100644 --- a/test/convex/test_hilbert.jl +++ b/test/convex/test_hilbert.jl @@ -32,6 +32,8 @@ using LIBSVMFileIO using DataFrames using CSV using SpecialMatrices +using CategoricalArrays + Base.@kwdef mutable struct KrylovInfo normres::Float64 numops::Float64 @@ -52,7 +54,7 @@ for δ in [1e-5, 1e-7, 1e-9, 1e-10] r = Dict() # - r["GHM (Lanczos)"] = KrylovInfo(normres=0.0, numops=0) + r["Lanczos (GHM)"] = KrylovInfo(normres=0.0, numops=0) r["CG"] = KrylovInfo(normres=0.0, numops=0) r["GMRES"] = KrylovInfo(normres=0.0, numops=0) r["rGMRES"] = KrylovInfo(normres=0.0, numops=0) @@ -77,8 +79,8 @@ for δ in [1e-5, 1e-7, 1e-9, 1e-10] λ₁ = rl[1] ξ₁ = rl[2][1] - r["GHM (Lanczos)"].normres += (Fc(ξ₁) - λ₁ .* ξ₁) |> norm - r["GHM (Lanczos)"].numops += rl[end].numops + r["Lanczos (GHM)"].normres += (Fc(ξ₁) - λ₁ .* ξ₁) |> norm + r["Lanczos (GHM)"].numops += rl[end].numops rl = KrylovKit.linsolve( Hc, -g, w₀, CG(; tol=εₙ, maxiter=max_iteration, verbosity=3); @@ -108,7 +110,7 @@ kappa = [@sprintf "%0.1e" k for k in tmat[2, :]] df = DataFrame( delta=[L"$\delta=$%$k" for k in delta], - kappa=[L"$\kappa_H=$%$k" for k in kappa], + kappa=["$k" for k in kappa], method=tmat[3, :], k=tmat[4, :], ϵ=tmat[5, :] @@ -123,20 +125,33 @@ print(df.set_index(["delta", "kappa", "method"]).to_latex(multirow=True, longtab """ using StatsPlots + + +function Base.unique(ctg::CategoricalArray) + l = levels(ctg) + newctg = CategoricalArray(l) + levels!(newctg, l) +end + +ctg = CategoricalArray(df.method) +levels!(ctg, ["Lanczos (GHM)", "GMRES", "rGMRES", "CG"]) + + pgfplotsx() fig = groupedbar( - df.method, + ctg, convert(Vector{Float64}, df.k), bar_position=:dodge, group=df.kappa, palette=:Paired_8, - # xlabel="Method", - leg=:topright, + leg=:topleft, legendfontsize=14, labelfontsize=14, - ylabel=L"Krylov Iterations: $K$" + ylabel=L"$K$: Krylov Iterations", + ytickfont=font(12), + xtickfont=font(12), ) savefig(fig, "/tmp/hilbert.tex") savefig(fig, "/tmp/hilbert.pdf") -savefig(fig, "/tmp/hilbert.png") \ No newline at end of file +# savefig(fig, "/tmp/hilbert.png") From 8bc2bbfc53771191e1d7d8d6998f425fe85285d4 Mon Sep 17 00:00:00 2001 From: cz Date: Tue, 16 Apr 2024 20:50:45 +0800 Subject: [PATCH 14/17] add cold skewed start --- src/algorithms/hsodm.jl | 1 + src/utilities/homogeneous.jl | 13 +++++++++---- src/utilities/trustregion.jl | 2 +- test/tools.jl | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/algorithms/hsodm.jl b/src/algorithms/hsodm.jl index 42b4ec2..0451dbd 100644 --- a/src/algorithms/hsodm.jl +++ b/src/algorithms/hsodm.jl @@ -333,6 +333,7 @@ function Base.show(io::IO, t::T) where {T<:HSODMIteration} @printf io " inner iteration limit := %s\n" t.itermax @printf io " line-search algorithm := %s\n" t.linesearch @printf io " adaptive strategy := %s\n" t.adaptive + @printf io " eigenvalue init := %s\n" t.direction if t.hvp !== nothing @printf io " second-order info := using provided Hessian-vector product\n" elseif t.H !== nothing diff --git a/src/utilities/homogeneous.jl b/src/utilities/homogeneous.jl index 8798de4..748860c 100644 --- a/src/utilities/homogeneous.jl +++ b/src/utilities/homogeneous.jl @@ -261,10 +261,13 @@ function _eigenvalue( n = length(state.x) # _reltol = state.ϵ > 1e-4 ? 1e-5 : iter.eigtol # _tol = _reltol * state.ϵ - _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-3 * state.ϵ) + _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-2 * state.ϵ) if bg == :krylov if iter.direction == :cold - vals, vecs, info = KrylovKit.eigsolve(B, n + 1, 1, :SR, Float64; tol=_tol, issymmetric=true, eager=true) + q = zeros(n+1) + q[end]=1.0 + # vals, vecs, info = KrylovKit.eigsolve(B, n + 1, 1, :SR, Float64; tol=_tol, issymmetric=true, eager=true) + vals, vecs, info = KrylovKit.eigsolve(B, q, 1, :SR; tol=_tol, issymmetric=true, eager=true) else vals, vecs, info = KrylovKit.eigsolve(B, state.ξ, 1, :SR; tol=_tol, issymmetric=true, eager=true) end @@ -285,11 +288,13 @@ end function _eigenvalue(f::Function, iter, state; bg=:krylov) - _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-3 * state.ϵ) + _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-2 * state.ϵ) if bg == :krylov if iter.direction == :cold n = length(state.x) - vals, vecs, info = KrylovKit.eigsolve(f, n + 1, 1, :SR, Float64; tol=_tol, issymmetric=true, eager=true) + q = zeros(n+1) + q[end]=1.0 + vals, vecs, info = KrylovKit.eigsolve(f, q, 1, :SR; tol=_tol, issymmetric=true, eager=true) else vals, vecs, info = KrylovKit.eigsolve(f, state.ξ, 1, :SR; tol=_tol, issymmetric=true, eager=true) end diff --git a/src/utilities/trustregion.jl b/src/utilities/trustregion.jl index 8f386b1..ab3eec3 100644 --- a/src/utilities/trustregion.jl +++ b/src/utilities/trustregion.jl @@ -14,7 +14,7 @@ const lsa::HagerZhangEx = HagerZhangEx( const lsb::BackTrackingEx = BackTrackingEx( ρ_hi=0.8, ρ_lo=0.1, - order=2 + order=3 ) @doc raw""" diff --git a/test/tools.jl b/test/tools.jl index 2ec6291..c348cba 100644 --- a/test/tools.jl +++ b/test/tools.jl @@ -240,7 +240,7 @@ if add_hsodm x0=copy(x), f=loss, g=g, linesearch=:hagerzhang, # linesearch=:backtrack, - direction=:warm, + direction=:cold, adaptive=:mishchenko, kwargs..., options... From 384446446851c3ef3fa220a221b1c32958dd8254 Mon Sep 17 00:00:00 2001 From: C Zhang Date: Tue, 16 Apr 2024 09:27:47 -0400 Subject: [PATCH 15/17] reformat --- src/utilities/homogeneous.jl | 29 +++++++++++++++++++++-------- src/utilities/linesearches.jl | 11 +++++++++++ src/utilities/trustregion.jl | 11 ----------- test/tools.jl | 2 +- 4 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/utilities/homogeneous.jl b/src/utilities/homogeneous.jl index 748860c..1a8de48 100644 --- a/src/utilities/homogeneous.jl +++ b/src/utilities/homogeneous.jl @@ -264,8 +264,8 @@ function _eigenvalue( _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-2 * state.ϵ) if bg == :krylov if iter.direction == :cold - q = zeros(n+1) - q[end]=1.0 + q = zeros(n + 1) + q[end] = 1.0 # vals, vecs, info = KrylovKit.eigsolve(B, n + 1, 1, :SR, Float64; tol=_tol, issymmetric=true, eager=true) vals, vecs, info = KrylovKit.eigsolve(B, q, 1, :SR; tol=_tol, issymmetric=true, eager=true) else @@ -288,14 +288,25 @@ end function _eigenvalue(f::Function, iter, state; bg=:krylov) - _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-2 * state.ϵ) + _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-1 * state.ϵ) if bg == :krylov + n = length(state.x) if iter.direction == :cold - n = length(state.x) - q = zeros(n+1) - q[end]=1.0 + vals, vecs, info = KrylovKit.eigsolve(f, n + 1, 1, :SR; tol=_tol, issymmetric=true, eager=true) + elseif iter.direction == :skewed + q = zeros(n + 1) + q[end] = 1.0 vals, vecs, info = KrylovKit.eigsolve(f, q, 1, :SR; tol=_tol, issymmetric=true, eager=true) - else + elseif iter.direction == :warm + vals, vecs, info = KrylovKit.eigsolve(f, state.ξ, 1, :SR; tol=_tol, issymmetric=true, eager=true) + elseif iter.direction == :auto + q = zeros(n + 1) + if state.α < 1 + q = state.ξ + else + # q[1:end-1] = (rand(Float64, n) |> normalize) * 0.05 + q[end] = 1.0 + end vals, vecs, info = KrylovKit.eigsolve(f, state.ξ, 1, :SR; tol=_tol, issymmetric=true, eager=true) end end @@ -303,7 +314,9 @@ function _eigenvalue(f::Function, iter, state; bg=:krylov) end - +################################################################################ +# NEWTON STEPS +################################################################################ function NewtonStep( H::SparseMatrixCSC{R,T}, μ, g, state; verbose::Bool=false ) where {R<:Real,T<:Int} diff --git a/src/utilities/linesearches.jl b/src/utilities/linesearches.jl index 78f8f43..17f73b5 100644 --- a/src/utilities/linesearches.jl +++ b/src/utilities/linesearches.jl @@ -1,3 +1,14 @@ +include("./hagerzhang.jl") +include("./backtracking.jl") + +const lsa::HagerZhangEx = HagerZhangEx( +# linesearchmax=10 +) +const lsb::BackTrackingEx = BackTrackingEx( + ρ_hi=0.8, + ρ_lo=0.1, + order=3 +) function TRStyleLineSearch( iter::IterationType, diff --git a/src/utilities/trustregion.jl b/src/utilities/trustregion.jl index ab3eec3..4d797d9 100644 --- a/src/utilities/trustregion.jl +++ b/src/utilities/trustregion.jl @@ -5,17 +5,6 @@ using Dates using KrylovKit using Distributions -include("./hagerzhang.jl") -include("./backtracking.jl") - -const lsa::HagerZhangEx = HagerZhangEx( -# linesearchmax=10 -) -const lsb::BackTrackingEx = BackTrackingEx( - ρ_hi=0.8, - ρ_lo=0.1, - order=3 -) @doc raw""" A simple procedure to solve TRS via a given regularizer diff --git a/test/tools.jl b/test/tools.jl index c348cba..f882d82 100644 --- a/test/tools.jl +++ b/test/tools.jl @@ -240,7 +240,7 @@ if add_hsodm x0=copy(x), f=loss, g=g, linesearch=:hagerzhang, # linesearch=:backtrack, - direction=:cold, + direction=:auto, adaptive=:mishchenko, kwargs..., options... From c28892e09f10442a25ac6a04c2d905160391cc9e Mon Sep 17 00:00:00 2001 From: cz Date: Wed, 17 Apr 2024 01:14:38 +0800 Subject: [PATCH 16/17] fix bugs --- src/utilities/homogeneous.jl | 4 ++-- test/cutest-benchmark/test_setup.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utilities/homogeneous.jl b/src/utilities/homogeneous.jl index 1a8de48..61bf8d3 100644 --- a/src/utilities/homogeneous.jl +++ b/src/utilities/homogeneous.jl @@ -288,7 +288,7 @@ end function _eigenvalue(f::Function, iter, state; bg=:krylov) - _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-1 * state.ϵ) + _tol = state.ϵ > 1e-3 ? 1e-5 : max(iter.eigtol, 1e-2 * state.ϵ) if bg == :krylov n = length(state.x) if iter.direction == :cold @@ -301,7 +301,7 @@ function _eigenvalue(f::Function, iter, state; bg=:krylov) vals, vecs, info = KrylovKit.eigsolve(f, state.ξ, 1, :SR; tol=_tol, issymmetric=true, eager=true) elseif iter.direction == :auto q = zeros(n + 1) - if state.α < 1 + if state.α < 1e-3 q = state.ξ else # q[1:end-1] = (rand(Float64, n) |> normalize) * 0.05 diff --git a/test/cutest-benchmark/test_setup.jl b/test/cutest-benchmark/test_setup.jl index 988a73f..0ccb723 100644 --- a/test/cutest-benchmark/test_setup.jl +++ b/test/cutest-benchmark/test_setup.jl @@ -49,9 +49,9 @@ filter_optimization_method(k) = k == :HSODMhvp # PROBLEMS = TEST # PROBLEMS = UNC_PROBLEMS_4to200 # PROBLEMS = UNC_PROBLEMS_200to5000 -# PROBLEMS = UNC_PROBLEMS_GOOD +PROBLEMS = UNC_PROBLEMS_GOOD # PROBLEMS = UNC_PROBLEMS_COMB[155:end] -PROBLEMS = UNC_PROBLEMS_COMB +# PROBLEMS = UNC_PROBLEMS_COMB # PROBLEMS = UNC_PROBLEM_NO_PARAMS if test_before_start From 2fc961a1f5caa1e6f25f2104087017e14e3d649a Mon Sep 17 00:00:00 2001 From: C Zhang Date: Thu, 25 Apr 2024 07:45:49 -0400 Subject: [PATCH 17/17] update linsys --- test/convex/test_linear_system.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/convex/test_linear_system.jl b/test/convex/test_linear_system.jl index de9ec7a..720a059 100644 --- a/test/convex/test_linear_system.jl +++ b/test/convex/test_linear_system.jl @@ -37,7 +37,6 @@ Base.@kwdef mutable struct KrylovInfo numops::Float64 end table = [] -name = "a4a" # name = "a9a" # name = "w4a" # name = "covtype" @@ -45,7 +44,8 @@ name = "a4a" # name = "rcv1" # names = ["a4a"] #, "a9a", "w4a", "covtype", "rcv1", "news20"] # names = ["covtype"] #, "a9a", "w4a", "covtype", "rcv1", "news20"] -names = ["a4a", "a9a", "w4a", "covtype", "rcv1"] +names = ["news20"] #, "a9a", "w4a", "covtype", "rcv1", "news20"] +# names = ["a4a", "a9a", "w4a", "covtype", "rcv1"] @warn "news20 is very big..., consider run on a server" # use the data matrix of libsvm f1(A, d=2) = sqrt.(sum(abs2.(A), dims=d))