diff --git a/pyiceberg/utils/config.py b/pyiceberg/utils/config.py index ab9b549d25..63120f382d 100644 --- a/pyiceberg/utils/config.py +++ b/pyiceberg/utils/config.py @@ -59,10 +59,8 @@ def _lowercase_dictionary_keys(input_dict: RecursiveDict) -> RecursiveDict: class Config: config: RecursiveDict - def __init__(self) -> None: - config = self._from_configuration_files() or {} - config = merge_config(config, self._from_environment_variables(config)) - self.config = FrozenDict(**config) + def __init__(self, config: RecursiveDict | None = None) -> None: + self.config = FrozenDict(**(config or {})) @staticmethod def _from_configuration_files() -> RecursiveDict | None: diff --git a/tests/catalog/test_base.py b/tests/catalog/test_base.py index 1a47478313..fe1bff583a 100644 --- a/tests/catalog/test_base.py +++ b/tests/catalog/test_base.py @@ -16,7 +16,6 @@ # under the License. # pylint:disable=redefined-outer-name - from collections.abc import Generator from pathlib import PosixPath diff --git a/tests/catalog/test_bigquery_metastore.py b/tests/catalog/test_bigquery_metastore.py index c8c7584262..278d342a95 100644 --- a/tests/catalog/test_bigquery_metastore.py +++ b/tests/catalog/test_bigquery_metastore.py @@ -14,7 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import os from unittest.mock import MagicMock from google.api_core.exceptions import NotFound @@ -25,6 +24,7 @@ from pyiceberg.catalog.bigquery_metastore import ICEBERG_TABLE_TYPE_VALUE, TABLE_TYPE_PROP, BigQueryMetastoreCatalog from pyiceberg.exceptions import NoSuchTableError from pyiceberg.schema import Schema +from pyiceberg.utils.config import Config def dataset_mock() -> Dataset: @@ -59,7 +59,10 @@ def test_create_table_with_database_location( mocker.patch("pyiceberg.catalog.bigquery_metastore.Client", return_value=client_mock) mocker.patch("pyiceberg.catalog.bigquery_metastore.FromInputFile.table_metadata", return_value=file_mock) - mocker.patch.dict(os.environ, values={"PYICEBERG_LEGACY_CURRENT_SNAPSHOT_ID": "True"}) + mocker.patch( + "pyiceberg.catalog.bigquery_metastore.Config", + return_value=Config({"legacy-current-snapshot-id": "True"}), + ) mocker.patch("pyiceberg.catalog.ToOutputFile.table_metadata", return_value=None) catalog_name = "test_ddb_catalog" @@ -85,7 +88,10 @@ def test_drop_table_with_database_location( mocker.patch("pyiceberg.catalog.bigquery_metastore.Client", return_value=client_mock) mocker.patch("pyiceberg.catalog.bigquery_metastore.FromInputFile.table_metadata", return_value=file_mock) - mocker.patch.dict(os.environ, values={"PYICEBERG_LEGACY_CURRENT_SNAPSHOT_ID": "True"}) + mocker.patch( + "pyiceberg.catalog.bigquery_metastore.Config", + return_value=Config({"legacy-current-snapshot-id": "True"}), + ) mocker.patch("pyiceberg.catalog.ToOutputFile.table_metadata", return_value=None) catalog_name = "test_ddb_catalog" @@ -111,7 +117,10 @@ def test_drop_table_with_database_location( def test_drop_namespace(mocker: MockFixture, gcp_dataset_name: str) -> None: client_mock = MagicMock() mocker.patch("pyiceberg.catalog.bigquery_metastore.Client", return_value=client_mock) - mocker.patch.dict(os.environ, values={"PYICEBERG_LEGACY_CURRENT_SNAPSHOT_ID": "True"}) + mocker.patch( + "pyiceberg.catalog.bigquery_metastore.Config", + return_value=Config({"legacy-current-snapshot-id": "True"}), + ) catalog_name = "test_catalog" test_catalog = BigQueryMetastoreCatalog(catalog_name, **{"gcp.bigquery.project-id": "alexstephen-test-1"}) @@ -146,7 +155,10 @@ def test_list_tables(mocker: MockFixture, gcp_dataset_name: str) -> None: client_mock.get_table.return_value = table_mock() mocker.patch("pyiceberg.catalog.bigquery_metastore.Client", return_value=client_mock) - mocker.patch.dict(os.environ, values={"PYICEBERG_LEGACY_CURRENT_SNAPSHOT_ID": "True"}) + mocker.patch( + "pyiceberg.catalog.bigquery_metastore.Config", + return_value=Config({"legacy-current-snapshot-id": "True"}), + ) catalog_name = "test_catalog" test_catalog = BigQueryMetastoreCatalog(catalog_name, **{"gcp.bigquery.project-id": "my-project"}) @@ -168,7 +180,10 @@ def test_list_namespaces(mocker: MockFixture) -> None: client_mock.list_datasets.return_value = iter([dataset_item_1, dataset_item_2]) mocker.patch("pyiceberg.catalog.bigquery_metastore.Client", return_value=client_mock) - mocker.patch.dict(os.environ, values={"PYICEBERG_LEGACY_CURRENT_SNAPSHOT_ID": "True"}) + mocker.patch( + "pyiceberg.catalog.bigquery_metastore.Config", + return_value=Config({"legacy-current-snapshot-id": "True"}), + ) catalog_name = "test_catalog" test_catalog = BigQueryMetastoreCatalog(catalog_name, **{"gcp.bigquery.project-id": "my-project"}) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 99d1ef947b..d6583fd407 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -2036,18 +2036,18 @@ def test_catalog_from_environment_variables(catalog_config_mock: mock.Mock, rest @mock.patch.dict(os.environ, EXAMPLE_ENV) -@mock.patch("pyiceberg.catalog._ENV_CONFIG.get_catalog_config") -def test_catalog_from_environment_variables_override(catalog_config_mock: mock.Mock, rest_mock: Mocker) -> None: +def test_catalog_from_environment_variables_override(rest_mock: Mocker) -> None: rest_mock.get( "https://other-service.io/api/v1/config", json={"defaults": {}, "overrides": {}}, status_code=200, ) env_config: RecursiveDict = Config._from_environment_variables({}) - - catalog_config_mock.return_value = cast(RecursiveDict, env_config.get("catalog")).get("production") - catalog = cast(RestCatalog, load_catalog("production", uri="https://other-service.io/api")) - assert catalog.uri == "https://other-service.io/api" + mock_env_config = mock.Mock() + mock_env_config.get_catalog_config.return_value = cast(RecursiveDict, env_config.get("catalog")).get("production") + with mock.patch("pyiceberg.catalog._ENV_CONFIG", mock_env_config): + catalog = cast(RestCatalog, load_catalog("production", uri="https://other-service.io/api")) + assert catalog.uri == "https://other-service.io/api" def test_catalog_from_parameters_empty_env(rest_mock: Mocker) -> None: diff --git a/tests/test_serializers.py b/tests/test_serializers.py index 53ce6fcd42..eaa2d1bcbb 100644 --- a/tests/test_serializers.py +++ b/tests/test_serializers.py @@ -28,6 +28,7 @@ from pyiceberg.table.metadata import TableMetadataV1 from pyiceberg.table.update import AssertRefSnapshotId, TableRequirement from pyiceberg.typedef import IcebergBaseModel +from pyiceberg.utils.config import Config def test_legacy_current_snapshot_id( @@ -42,6 +43,15 @@ def test_legacy_current_snapshot_id( assert static_table.metadata.current_snapshot_id is None mocker.patch.dict(os.environ, values={"PYICEBERG_LEGACY_CURRENT_SNAPSHOT_ID": "True"}) + env_config = Config._from_environment_variables({}) + mocker.patch( + "pyiceberg.serializers.Config", + return_value=Config(env_config), + ) + mocker.patch( + "pyiceberg.table.metadata.Config", + return_value=Config(env_config), + ) ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True) with PyArrowFileIO().new_input(location=metadata_location).open() as input_stream: diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py index 5cd6a7203a..21434c9ec8 100644 --- a/tests/utils/test_config.py +++ b/tests/utils/test_config.py @@ -28,8 +28,20 @@ def test_config() -> None: - """To check if all the file lookups go well without any mocking""" + """Config() should be a pure empty container and perform no implicit IO.""" assert Config() + assert Config().config == {} + + +def test_config_does_not_load_implicitly() -> None: + with ( + mock.patch.object(Config, "_from_configuration_files") as from_files_mock, + mock.patch.object(Config, "_from_environment_variables") as from_env_mock, + ): + Config() + + from_files_mock.assert_not_called() + from_env_mock.assert_not_called() @mock.patch.dict(os.environ, EXAMPLE_ENV) @@ -183,3 +195,14 @@ def create_config_file(path: str, uri: str | None) -> None: assert ( result["catalog"]["default"]["uri"] if result else None # type: ignore ) == expected_result, f"Unexpected configuration result. Expected: {expected_result}, Actual: {result}" + + +def test_load_reads_file_and_environment_once(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("PYICEBERG_CATALOG__PRODUCTION__URI", "https://env.service.io/api") + with mock.patch.object( + Config, "_from_configuration_files", return_value={"catalog": {"production": {"type": "rest"}}} + ) as files_mock: + config = Config() + + files_mock.assert_called_once() + assert config.get_catalog_config("production") == {"type": "rest", "uri": "https://env.service.io/api"}