add new utr test

bzhangcw · May 12, 2024 · 55c6146 · 55c6146
1 parent 7af3d47
commit 55c6146
Show file tree

Hide file tree

Showing 4 changed files with 189 additions and 0 deletions.
diff --git a/test/Project.toml b/test/Project.toml
@@ -11,6 +11,7 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
+JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
 KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 LDLFactorizations = "40e66cde-538c-5869-a4ad-c39174c6795b"
@@ -23,6 +24,7 @@ LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
 MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
 NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
+NLPModelsJuMP = "792afdf1-32c1-5681-94e0-d7bf7a5df49e"
 OhMyREPL = "5fb14364-9ced-5910-84b2-373655c76a03"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 PGFPlotsX = "8314cec4-20b6-5062-9cdb-752b83310925"
@@ -35,6 +37,7 @@ ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
 SpecialMatrices = "928aab9d-ef52-54ac-8ca1-acd7ca42c160"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
 Stopping = "c4fe5a9e-e7fb-5c3d-89d5-7f405ab2214f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

diff --git a/test/ncvx/README.md b/test/ncvx/README.md
@@ -0,0 +1,4 @@
+# Note
+
+- test script from [CAT](https://github.com/fadihamad94/CAT-NeurIPS/blob/main/scripts/solve_matrix_completion.jl)
+- you must download the data first.
diff --git a/test/ncvx/matcom.jl b/test/ncvx/matcom.jl
@@ -0,0 +1,94 @@
+using JuMP, NLPModels, NLPModelsJuMP, Random, Distributions, LinearAlgebra, Test, Optim, DataFrames, StatsBase, CSV
+
+Random.seed!(0)
+const CAT_SOLVER = "CAT"
+const NEWTON_TRUST_REGION_SOLVER = "NewtonTrustRegion"
+
+function formulateMatrixCompletionProblem(M::Matrix, Ω::Matrix{Int64}, r::Int64, λ_1::Float64, λ_2::Float64)
+    @show "Creating model"
+    model = Model()
+
+    D = transpose(M)
+    n_1 = size(D)[1]
+    n_2 = size(D)[2]
+    Ω = transpose(Ω)
+    temp_D = Ω .* D
+    μ = mean(temp_D)
+    @show "Creating variables"
+    A = ones(n_1, r)
+    B = ones(n_2, r)
+    @variable(model, P[i=1:n_1, j=1:r], start = A[i, j])
+    @variable(model, Q[i=1:n_2, j=1:r], start = B[i, j])
+
+    @NLexpression(model, sum_observer_deviation_rows_squared, sum(((1 / n_2) * sum(sum(P[i, k] * transpose(Q)[k, j] for k in 1:r) for j in 1:n_2) - μ)^2 for i in 1:n_1))
+
+    @NLexpression(model, sum_observer_deviation_columns_squared, sum(((1 / n_1) * sum(sum(P[i, k] * transpose(Q)[k, j] for k in 1:r) for i in 1:n_1) - μ)^2 for j in 1:n_2))
+
+    @NLexpression(model, frobeniusNorm_P, sum(sum(P[i, j]^2 for j in 1:r) for i in 1:n_1))
+
+    @NLexpression(model, frobeniusNorm_Q, sum(sum(Q[i, j]^2 for j in 1:r) for i in 1:n_2))
+
+    @NLexpression(model, square_loss, 0.5 * (sum(sum(Ω[i, j] * (D[i, j] - μ - ((1 / n_2) * sum(sum(P[i, k] * transpose(Q)[k, j] for k in 1:r) for j in 1:n_2) - μ) - ((1 / n_1) * sum(sum(P[i, k] * transpose(Q)[k, j] for k in 1:r) for i in 1:n_1) - μ) - sum(P[i, k] * transpose(Q)[k, j] for k in 1:r))^2 for j in 1:n_2) for i in 1:n_1)))
+
+    @show "Defining objective function"
+    @NLobjective(model, Min, square_loss + λ_1 * (sum_observer_deviation_rows_squared + sum_observer_deviation_columns_squared) + λ_2 * (frobeniusNorm_P + frobeniusNorm_Q))
+    return model
+end
+
+function getData(directoryName::String, fileName::String, rows::Int64, columns::Int64)
+    M = prepareData(directoryName, fileName, rows, columns)
+    Ω = sampleData(M)
+    return M, Ω
+end
+
+function getData(directoryName::String, fileName::String, rows::Int64, columns::Int64, i::Int64, j::Int64)
+    M = prepareData(directoryName, fileName, rows, columns, i, j)
+    Ω = sampleData(M)
+    return M, Ω
+end
+
+function prepareData(directoryName::String, fileName::String, rows::Int64, columns::Int64)
+    filePath = string(directoryName, "/", fileName)
+    df = DataFrame(CSV.File(filePath))
+    for i in 1:size(df)[1]
+        for j in 1:size(df)[2]
+            if typeof(df[i, j]) == Missing
+                df[i, j] = 1.0
+            end
+        end
+    end
+
+    df = Missings.replace(df, 1.0)
+    df = df[2:(2+rows-1), 5:(5+columns-1)]
+    M = Matrix(df)
+    return transpose(M)
+end
+
+function prepareData(directoryName::String, fileName::String, rows::Int64, columns::Int64, i::Int64, j::Int64)
+    @show "Reading file: $fileName"
+    filePath = string(directoryName, "/", fileName)
+    df = DataFrame(CSV.File(filePath))
+    @show "Replacing missing values"
+    for i in 1:size(df)[1]
+        for j in 1:size(df)[2]
+            if typeof(df[i, j]) == Missing
+                df[i, j] = 1.0
+            end
+        end
+    end
+
+    df = Missings.replace(df, 1.0)
+    @show "Creating Matrix M"
+    df = df[2:size(df)[1], 5:size(df)[2]]
+    df = df[1+rows*(i-1):rows*i, 1+columns*(j-1):columns*j]
+    M = Matrix(df)
+    return M
+end
+
+function sampleData(M::Matrix)
+    rows = size(M)[1]
+    columns = size(M)[2]
+    T = rows * columns
+    Ω = rand(DiscreteUniform(0, 1), rows, columns)
+    return Ω
+end
diff --git a/test/ncvx/test_matcom.jl b/test/ncvx/test_matcom.jl
@@ -0,0 +1,88 @@
+using DRSOM, DataFrames, CSV
+using AdaptiveRegularization
+
+include("./matcom.jl")
+include("../tools.jl")
+
+tables = []
+for λ in [1e-2, 1e-3, 1e-4]
+    for k = 1:1
+        i = 1
+        j = 1
+        rows = 30
+        columns = 48
+        r = 9
+        λ_1 = λ_2 = λ
+        D, Ω = getData("test/instances", "Adamstown 132_11kV FY2021.csv", rows, columns, i, j)
+        @time begin
+            model = formulateMatrixCompletionProblem(D, Ω, r, λ_1, λ_2)
+        end
+        global nlp = MathOptNLPModel(model)
+
+
+        x0 = nlp.meta.x0
+        loss(x) = NLPModels.obj(nlp, x)
+        g(x) = NLPModels.grad(nlp, x)
+        H(x) = NLPModels.hess(nlp, x)
+        hvp(x, v, Hv) = NLPModels.hprod!(nlp, x, v, Hv)
+
+        ru = UTR(name=Symbol("Universal-TRS"))(;
+            x0=copy(x0), f=loss, g=g, hvp=hvp,
+            maxiter=300, tol=1e-5, freq=10,
+            maxtime=1500,
+            bool_trace=true,
+            subpstrategy=:lanczos,
+        )
+        reset!(nlp)
+        stats, _ = ARCqKOp(
+            nlp,
+            max_time=500.0,
+            max_iter=500,
+            max_eval=typemax(Int64),
+            verbose=true
+            # atol=atol,
+            # rtol=rtol,
+            # @note: how to set |g|?
+        )
+        rarc = arc_to_result(nlp, stats, "ARC")
+        reset!(nlp)
+        stats, _ = ST_TROp(
+            nlp,
+            max_time=500.0,
+            max_iter=500,
+            max_eval=typemax(Int64),
+            verbose=true
+            # atol=atol,
+            # rtol=rtol,
+            # @note: how to set |g|?
+        )
+        # AdaptiveRegularization.jl to my style of results
+        rtrst = arc_to_result(nlp, stats, "TRST")
+
+        finalize(nlp)
+        push!(tables, [
+            λ_1,
+            "utr",
+            ru.state.k,
+            ru.state.kf,
+            ru.state.kg,
+        ])
+        push!(tables, [
+            λ_1,
+            "arc",
+            stats.iter,
+            rarc.state.kf,
+            rarc.state.kg,
+        ])
+        push!(tables, [
+            λ_1,
+            "trst",
+            stats.iter,
+            rtrst.state.kf,
+            rtrst.state.kg,
+        ])
+    end
+end
+df = DataFrame(hcat(tables...)', [:λ, :name, :k, :kf, :kg])
+
+CSV.write("1.csv", df)