From 1bafba87c35bacfb3b4ded8d85feb76f9b69bd1e Mon Sep 17 00:00:00 2001 From: rjzamora Date: Wed, 17 Jul 2024 08:24:50 -0700 Subject: [PATCH] use new configs module tfor central config validation point --- merlin/__init__.py | 55 ----------------------------------------- merlin/core/__init__.py | 4 ++- merlin/dag/__init__.py | 6 ++++- merlin/io/__init__.py | 8 ++++-- 4 files changed, 14 insertions(+), 59 deletions(-) delete mode 100644 merlin/__init__.py diff --git a/merlin/__init__.py b/merlin/__init__.py deleted file mode 100644 index 15a4a5ee5..000000000 --- a/merlin/__init__.py +++ /dev/null @@ -1,55 +0,0 @@ -# -# Copyright (c) 2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -_DASK_QUERY_PLANNING_ENABLED = False -try: - # Disable query-planning and string conversion - import dask - - dask.config.set( - { - "dataframe.query-planning": False, - "dataframe.convert-string": False, - } - ) -except ImportError: - pass -else: - import sys - - import dask.dataframe as dd - from packaging.version import parse - - if parse(dask.__version__) > parse("2024.6.0"): - # For newer versions of dask, we can just check - # the official DASK_EXPR_ENABLED constant - _DASK_QUERY_PLANNING_ENABLED = dd.DASK_EXPR_ENABLED - else: - # For older versions of dask, we must assume query - # planning is enabled if dask_expr was imported - # (because we can't know for sure) - _DASK_QUERY_PLANNING_ENABLED = "dask_expr" in sys.modules - - -if _DASK_QUERY_PLANNING_ENABLED: - raise NotImplementedError( - "Merlin does not support the query-planning API in Dask " - "Dataframe yet. Please make sure query-planning is " - "disabled before dask.dataframe is imported.\n\n" - "e.g. dask.config.set({'dataframe.query-planning': False})" - "\n\nOr set the environment variable: " - "export DASK_DATAFRAME__QUERY_PLANNING=False" - ) diff --git a/merlin/core/__init__.py b/merlin/core/__init__.py index f35898e5d..0dda4f9d5 100644 --- a/merlin/core/__init__.py +++ b/merlin/core/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +14,8 @@ # limitations under the License. # +from merlin.config import validate_dask_configs from merlin.core import _version __version__ = _version.get_versions()["version"] +validate_dask_configs() diff --git a/merlin/dag/__init__.py b/merlin/dag/__init__.py index dca0c76dd..c668e8945 100644 --- a/merlin/dag/__init__.py +++ b/merlin/dag/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +15,10 @@ # # flake8: noqa +from merlin.config import validate_dask_configs + +validate_dask_configs() + from merlin.dag.graph import Graph from merlin.dag.node import Node, iter_nodes, postorder_iter_nodes, preorder_iter_nodes from merlin.dag.operator import DataFormats, Operator, Supports diff --git a/merlin/io/__init__.py b/merlin/io/__init__.py index ff4058c5a..851f5a558 100644 --- a/merlin/io/__init__.py +++ b/merlin/io/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # - # flake8: noqa + +from merlin.config import validate_dask_configs + +validate_dask_configs() + from merlin.io import dataframe_iter, dataset, shuffle from merlin.io.dataframe_iter import DataFrameIter from merlin.io.dataset import MERLIN_METADATA_DIR_NAME, Dataset