From b929c2e771106b9fea86cbc3855b9168ee7480fb Mon Sep 17 00:00:00 2001 From: guilhermecastro Date: Thu, 8 Aug 2024 21:40:44 -0300 Subject: [PATCH 01/14] s3_signer_endpoint --- mkdocs/docs/configuration.md | 1 + pyiceberg/io/__init__.py | 2 ++ pyiceberg/io/fsspec.py | 8 +++++- tests/io/test_fsspec.py | 53 ++++++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 971ec53961..62631a2dcf 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -84,6 +84,7 @@ For the FileIO there are several configuration options available: | s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | | s3.signer | bearer | Configure the signature version of the FileIO. | | s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/v1/aws/s3/sign`. | +| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/. Default to v1/aws/s3/sign`. | | s3.region | us-west-2 | Sets the region of the bucket | | s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | | s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index 0567af2d5d..56a850d799 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -58,6 +58,8 @@ S3_PROXY_URI = "s3.proxy-uri" S3_CONNECT_TIMEOUT = "s3.connect-timeout" S3_SIGNER_URI = "s3.signer.uri" +S3_SIGNER_ENDPOINT = "s3.signer.endpoint" +S3_SIGNER_ENDPOINT_DEFAULT_VALUE = "v1/aws/s3/sign" HDFS_HOST = "hdfs.host" HDFS_PORT = "hdfs.port" HDFS_USER = "hdfs.user" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index d6e4a32add..36b69d5299 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -67,6 +67,8 @@ S3_REGION, S3_SECRET_ACCESS_KEY, S3_SESSION_TOKEN, + S3_SIGNER_ENDPOINT, + S3_SIGNER_ENDPOINT_DEFAULT_VALUE, S3_SIGNER_URI, ADLFS_ClIENT_SECRET, FileIO, @@ -86,6 +88,10 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A raise SignError("Signer set, but token is not available") signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/") + signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, None) + if signer_endpoint is None: + signer_endpoint = properties.get("endpoint", S3_SIGNER_ENDPOINT_DEFAULT_VALUE) + signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"} signer_body = { "method": request.method, @@ -94,7 +100,7 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A "headers": {key: [val] for key, val in request.headers.items()}, } - response = requests.post(f"{signer_url}/v1/aws/s3/sign", headers=signer_headers, json=signer_body) + response = requests.post(f"{signer_url}/{signer_endpoint.lstrip()}", headers=signer_headers, json=signer_body) try: response.raise_for_status() response_json = response.json() diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py index 3dd79e182b..1f21e44982 100644 --- a/tests/io/test_fsspec.py +++ b/tests/io/test_fsspec.py @@ -727,6 +727,59 @@ def test_s3v4_rest_signer(requests_mock: Mocker) -> None: } +def test_s3v4_rest_signer_endpoint(requests_mock: Mocker) -> None: + new_uri = "https://other-bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro" + endpoint = "v1/main/s3-sign/foo.bar?e=e&b=b&k=k=k&s=s&w=w" + requests_mock.post( + f"{TEST_URI}/{endpoint}", + json={ + "uri": new_uri, + "headers": { + "Authorization": [ + "AWS4-HMAC-SHA256 Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02" + ], + "Host": ["bucket.s3.us-west-2.amazonaws.com"], + "User-Agent": ["Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"], + "x-amz-content-sha256": ["UNSIGNED-PAYLOAD"], + "X-Amz-Date": ["20221017T102940Z"], + "X-Amz-Security-Token": [ + "YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwREf1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/5Gw=" + ], + }, + "extensions": {}, + }, + status_code=200, + ) + + request = AWSRequest( + method="HEAD", + url="https://bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro", + headers={"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"}, + data=b"", + params={}, + auth_path="/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro", + ) + request.context = { + "client_region": "us-west-2", + "has_streaming_input": False, + "auth_type": None, + "signing": {"bucket": "bucket"}, + "retries": {"attempt": 1, "invocation-id": "75d143fb-0219-439b-872c-18213d1c8d54"}, + } + + signed_request = s3v4_rest_signer({"token": "abc", "uri": TEST_URI, "endpoint": endpoint}, request) + + assert signed_request.url == new_uri + assert dict(signed_request.headers) == { + "Authorization": "AWS4-HMAC-SHA256 Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02", + "Host": "bucket.s3.us-west-2.amazonaws.com", + "User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0", + "X-Amz-Date": "20221017T102940Z", + "X-Amz-Security-Token": "YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwREf1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/5Gw=", + "x-amz-content-sha256": "UNSIGNED-PAYLOAD", + } + + def test_s3v4_rest_signer_forbidden(requests_mock: Mocker) -> None: requests_mock.post( f"{TEST_URI}/v1/aws/s3/sign", From e1d9f645af28914b982cc1f4aa41ab02363ed002 Mon Sep 17 00:00:00 2001 From: Guilherme Torres Castro <1149991+guitcastro@users.noreply.github.com> Date: Fri, 9 Aug 2024 10:43:30 -0300 Subject: [PATCH 02/14] prune any trailing whitespaces Co-authored-by: Fokko Driesprong --- pyiceberg/io/fsspec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 36b69d5299..1c14d640fe 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -100,7 +100,7 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A "headers": {key: [val] for key, val in request.headers.items()}, } - response = requests.post(f"{signer_url}/{signer_endpoint.lstrip()}", headers=signer_headers, json=signer_body) + response = requests.post(f"{signer_url}/{signer_endpoint.strip()}", headers=signer_headers, json=signer_body) try: response.raise_for_status() response_json = response.json() From f3cfd370878eeb4731e1a8a32276a401f832a9f6 Mon Sep 17 00:00:00 2001 From: Guilherme Torres Castro <1149991+guitcastro@users.noreply.github.com> Date: Fri, 9 Aug 2024 10:44:00 -0300 Subject: [PATCH 03/14] fallback to default value instead of "endpoint" property Co-authored-by: Fokko Driesprong --- pyiceberg/io/fsspec.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 1c14d640fe..7e242a99ad 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -88,9 +88,7 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A raise SignError("Signer set, but token is not available") signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/") - signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, None) - if signer_endpoint is None: - signer_endpoint = properties.get("endpoint", S3_SIGNER_ENDPOINT_DEFAULT_VALUE) + signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, S3_SIGNER_ENDPOINT_DEFAULT_VALUE) signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"} signer_body = { From 989bd14ff9a5eace7b46224878d8b2adc759db0c Mon Sep 17 00:00:00 2001 From: guilhermecastro Date: Fri, 9 Aug 2024 10:47:28 -0300 Subject: [PATCH 04/14] fix test_s3v4_rest_signer_endpoint --- tests/io/test_fsspec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py index 1f21e44982..fbb184910a 100644 --- a/tests/io/test_fsspec.py +++ b/tests/io/test_fsspec.py @@ -767,7 +767,7 @@ def test_s3v4_rest_signer_endpoint(requests_mock: Mocker) -> None: "retries": {"attempt": 1, "invocation-id": "75d143fb-0219-439b-872c-18213d1c8d54"}, } - signed_request = s3v4_rest_signer({"token": "abc", "uri": TEST_URI, "endpoint": endpoint}, request) + signed_request = s3v4_rest_signer({"token": "abc", "uri": TEST_URI, "s3.signer.endpoint": endpoint}, request) assert signed_request.url == new_uri assert dict(signed_request.headers) == { From 2045f8361625a72de5b10ebf1d83f0f3909137a3 Mon Sep 17 00:00:00 2001 From: Guilherme Torres Castro <1149991+guitcastro@users.noreply.github.com> Date: Fri, 9 Aug 2024 10:57:16 -0300 Subject: [PATCH 05/14] Fix missing backtick Co-authored-by: Fokko Driesprong --- mkdocs/docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 62631a2dcf..7722b369a3 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -84,7 +84,7 @@ For the FileIO there are several configuration options available: | s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | | s3.signer | bearer | Configure the signature version of the FileIO. | | s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/v1/aws/s3/sign`. | -| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/. Default to v1/aws/s3/sign`. | +| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. Default to v1/aws/s3/sign`. | | s3.region | us-west-2 | Sets the region of the bucket | | s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | | s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | From cf5485603e4ccfad182fc08db5af125659eb457b Mon Sep 17 00:00:00 2001 From: guilhermecastro Date: Fri, 9 Aug 2024 12:14:19 -0300 Subject: [PATCH 06/14] create access_delegation property --- mkdocs/docs/configuration.md | 29 +++++++++++++++-------------- pyiceberg/catalog/rest.py | 4 +++- tests/catalog/test_rest.py | 25 +++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 7722b369a3..f6b8f569a3 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -84,7 +84,7 @@ For the FileIO there are several configuration options available: | s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | | s3.signer | bearer | Configure the signature version of the FileIO. | | s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/v1/aws/s3/sign`. | -| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. Default to v1/aws/s3/sign`. | +| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. Default to v1/aws/s3/sign\`. | | s3.region | us-west-2 | Sets the region of the bucket | | s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | | s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | @@ -199,19 +199,20 @@ catalog: -| Key | Example | Description | -| ------------------- | -------------------------------- | -------------------------------------------------------------------------------------------------- | -| uri | https://rest-catalog/ws | URI identifying the REST Server | -| ugi | t-1234:secret | Hadoop UGI for Hive client. | -| credential | t-1234:secret | Credential to use for OAuth2 credential flow when initializing the catalog | -| token | FEW23.DFSDF.FSDF | Bearer token value to use for `Authorization` header | -| scope | openid offline corpds:ds:profile | Desired scope of the requested security token (default : catalog) | -| resource | rest_catalog.iceberg.com | URI for the target resource or service | -| audience | rest_catalog | Logical name of target resource or service | -| rest.sigv4-enabled | true | Sign requests to the REST Server using AWS SigV4 protocol | -| rest.signing-region | us-east-1 | The region to use when SigV4 signing a request | -| rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request | -| oauth2-server-uri | https://auth-service/cc | Authentication URL to use for client credentials authentication (default: uri + 'v1/oauth/tokens') | +| Key | Example | Description | +| ------------------- | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| uri | https://rest-catalog/ws | URI identifying the REST Server | +| ugi | t-1234:secret | Hadoop UGI for Hive client. | +| credential | t-1234:secret | Credential to use for OAuth2 credential flow when initializing the catalog | +| token | FEW23.DFSDF.FSDF | Bearer token value to use for `Authorization` header | +| scope | openid offline corpds:ds:profile | Desired scope of the requested security token (default : catalog) | +| resource | rest_catalog.iceberg.com | URI for the target resource or service | +| audience | rest_catalog | Logical name of target resource or service | +| access_delegation | remote-signing | A comma-separated list of access mechanisms to signal the server that the client supports delegated access. It will to be sended in `X-Iceberg-Access-Delegation` header. (default: vended-credentials) | +| rest.sigv4-enabled | true | Sign requests to the REST Server using AWS SigV4 protocol | +| rest.signing-region | us-east-1 | The region to use when SigV4 signing a request | +| rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request | +| oauth2-server-uri | https://auth-service/cc | Authentication URL to use for client credentials authentication (default: uri + 'v1/oauth/tokens') | diff --git a/pyiceberg/catalog/rest.py b/pyiceberg/catalog/rest.py index c22b614d6e..d3cd5dc884 100644 --- a/pyiceberg/catalog/rest.py +++ b/pyiceberg/catalog/rest.py @@ -98,6 +98,8 @@ class Endpoints: rename_table: str = "tables/rename" +ACCESS_DELEGATION = "access_delegation" +ACCESS_DELEGATION_DEFAULT_VALUE = "vended-credentials" AUTHORIZATION_HEADER = "Authorization" BEARER_PREFIX = "Bearer" CATALOG_SCOPE = "catalog" @@ -532,7 +534,7 @@ def _config_headers(self, session: Session) -> None: session.headers["Content-type"] = "application/json" session.headers["X-Client-Version"] = ICEBERG_REST_SPEC_VERSION session.headers["User-Agent"] = f"PyIceberg/{__version__}" - session.headers["X-Iceberg-Access-Delegation"] = "vended-credentials" + session.headers["X-Iceberg-Access-Delegation"] = self.properties.get(ACCESS_DELEGATION, ACCESS_DELEGATION_DEFAULT_VALUE) def _extract_headers_from_properties(self) -> Dict[str, str]: return {key[len(HEADER_PREFIX) :]: value for key, value in self.properties.items() if key.startswith(HEADER_PREFIX)} diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 54239ce3f4..fb6db1c8b3 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -57,6 +57,7 @@ "X-Client-Version": "0.14.1", "User-Agent": f"PyIceberg/{pyiceberg.__version__}", "Authorization": f"Bearer {TEST_TOKEN}", + "X-Iceberg-Access-Delegation": "vended-credentials", } OAUTH_TEST_HEADERS = { "Content-type": "application/x-www-form-urlencoded", @@ -659,6 +660,30 @@ def test_load_table_200(rest_mock: Mocker, example_table_metadata_with_snapshot_ assert actual == expected +def test_load_table_honor_access_delegation( + rest_mock: Mocker, example_table_metadata_with_snapshot_v1_rest_json: Dict[str, Any] +) -> None: + test_headers_with_remote_signing = {**TEST_HEADERS, "X-Iceberg-Access-Delegation": "remote-signing"} + rest_mock.get( + f"{TEST_URI}v1/namespaces/fokko/tables/table", + json=example_table_metadata_with_snapshot_v1_rest_json, + status_code=200, + request_headers=test_headers_with_remote_signing, + ) + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, access_delegation="remote-signing") + actual = catalog.load_table(("fokko", "table")) + expected = Table( + identifier=("fokko", "table"), + metadata_location=example_table_metadata_with_snapshot_v1_rest_json["metadata-location"], + metadata=TableMetadataV1(**example_table_metadata_with_snapshot_v1_rest_json["metadata"]), + io=load_file_io(), + catalog=catalog, + ) + # First compare the dicts + assert actual.metadata.model_dump() == expected.metadata.model_dump() + assert actual == expected + + def test_load_table_from_self_identifier_200( rest_mock: Mocker, example_table_metadata_with_snapshot_v1_rest_json: Dict[str, Any] ) -> None: From a197d28d9f299b638b45b88deaf7b10c4f11e95b Mon Sep 17 00:00:00 2001 From: guilhermecastro Date: Fri, 9 Aug 2024 16:33:28 -0300 Subject: [PATCH 07/14] rename S3_SIGNER_ENDPOINT_DEFAULT_VALUE to S3_SIGNER_ENDPOINT_DEFAULT --- pyiceberg/io/__init__.py | 2 +- pyiceberg/io/fsspec.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index 56a850d799..d5f26a1780 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -59,7 +59,7 @@ S3_CONNECT_TIMEOUT = "s3.connect-timeout" S3_SIGNER_URI = "s3.signer.uri" S3_SIGNER_ENDPOINT = "s3.signer.endpoint" -S3_SIGNER_ENDPOINT_DEFAULT_VALUE = "v1/aws/s3/sign" +S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign" HDFS_HOST = "hdfs.host" HDFS_PORT = "hdfs.port" HDFS_USER = "hdfs.user" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 7e242a99ad..a5bf2381cb 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -68,7 +68,7 @@ S3_SECRET_ACCESS_KEY, S3_SESSION_TOKEN, S3_SIGNER_ENDPOINT, - S3_SIGNER_ENDPOINT_DEFAULT_VALUE, + S3_SIGNER_ENDPOINT_DEFAULT, S3_SIGNER_URI, ADLFS_ClIENT_SECRET, FileIO, @@ -88,7 +88,7 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A raise SignError("Signer set, but token is not available") signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/") - signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, S3_SIGNER_ENDPOINT_DEFAULT_VALUE) + signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, S3_SIGNER_ENDPOINT_DEFAULT) signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"} signer_body = { From a425b69b3d9b84acc760e6045677544d4e1e7772 Mon Sep 17 00:00:00 2001 From: guilhermecastro Date: Fri, 9 Aug 2024 16:33:41 -0300 Subject: [PATCH 08/14] fix s3.signer.endpoint docs --- mkdocs/docs/configuration.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 7722b369a3..eec7b330ed 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -77,14 +77,14 @@ For the FileIO there are several configuration options available: | Key | Example | Description | -| -------------------- | ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| -------------------- | ------------------------ |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | s3.endpoint | https://10.0.19.25/ | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | | s3.access-key-id | admin | Configure the static access key id used to access the FileIO. | | s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. | | s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | | s3.signer | bearer | Configure the signature version of the FileIO. | -| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/v1/aws/s3/sign`. | -| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. Default to v1/aws/s3/sign`. | +| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. | +| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. (default : v1/aws/s3/sign). | | s3.region | us-west-2 | Sets the region of the bucket | | s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | | s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | From 10cc361c2844ef6ef6049b9bf482dbec90289d68 Mon Sep 17 00:00:00 2001 From: guilhermecastro Date: Fri, 9 Aug 2024 17:20:41 -0300 Subject: [PATCH 09/14] fk typo in signer --- mkdocs/docs/configuration.md | 22 +++++++++++----------- pyiceberg/io/fsspec.py | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index eec7b330ed..9ebdde762a 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -76,18 +76,18 @@ For the FileIO there are several configuration options available: -| Key | Example | Description | -| -------------------- | ------------------------ |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Key | Example | Description | +| -------------------- | ------------------------ |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | s3.endpoint | https://10.0.19.25/ | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | -| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. | -| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. | -| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | -| s3.signer | bearer | Configure the signature version of the FileIO. | -| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. | -| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. (default : v1/aws/s3/sign). | -| s3.region | us-west-2 | Sets the region of the bucket | -| s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | -| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | +| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. | +| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. | +| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | +| s3.signer | bearer | Configure the signature version of the FileIO. | +| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. | +| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. (default : v1/aws/s3/sign). | +| s3.region | us-west-2 | Sets the region of the bucket | +| s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | +| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index a5bf2381cb..cbe5d5b64b 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -135,9 +135,9 @@ def _s3(properties: Properties) -> AbstractFileSystem: if signer := properties.get("s3.signer"): logger.info("Loading signer %s", signer) - if singer_func := SIGNERS.get(signer): - singer_func_with_properties = partial(singer_func, properties) - register_events["before-sign.s3"] = singer_func_with_properties + if signer_func := SIGNERS.get(signer): + signer_func_with_properties = partial(signer_func, properties) + register_events["before-sign.s3"] = signer_func_with_properties # Disable the AWS Signer config_kwargs["signature_version"] = UNSIGNED From 07325e595af1d9de06a784da61d5f5ea03220dc7 Mon Sep 17 00:00:00 2001 From: guilhermecastro Date: Mon, 12 Aug 2024 10:01:25 -0300 Subject: [PATCH 10/14] fix fmt --- mkdocs/docs/configuration.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 9ebdde762a..5f67b34ffe 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -76,18 +76,18 @@ For the FileIO there are several configuration options available: -| Key | Example | Description | -| -------------------- | ------------------------ |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Key | Example | Description | +| -------------------- | ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | s3.endpoint | https://10.0.19.25/ | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | -| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. | -| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. | -| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | -| s3.signer | bearer | Configure the signature version of the FileIO. | -| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. | +| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. | +| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. | +| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | +| s3.signer | bearer | Configure the signature version of the FileIO. | +| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. | | s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. (default : v1/aws/s3/sign). | -| s3.region | us-west-2 | Sets the region of the bucket | -| s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | -| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | +| s3.region | us-west-2 | Sets the region of the bucket | +| s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | +| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | From da3490b6456c83241a1fde1f2a7df9311d6b9bff Mon Sep 17 00:00:00 2001 From: guilhermecastro Date: Mon, 12 Aug 2024 15:31:11 -0300 Subject: [PATCH 11/14] rename ACCESS_DELEGATION_DEFAULT_VALUE to ACCESS_DELEGATION_DEFAULT --- pyiceberg/catalog/rest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiceberg/catalog/rest.py b/pyiceberg/catalog/rest.py index d3cd5dc884..1c4caff8f3 100644 --- a/pyiceberg/catalog/rest.py +++ b/pyiceberg/catalog/rest.py @@ -99,7 +99,7 @@ class Endpoints: ACCESS_DELEGATION = "access_delegation" -ACCESS_DELEGATION_DEFAULT_VALUE = "vended-credentials" +ACCESS_DELEGATION_DEFAULT = "vended-credentials" AUTHORIZATION_HEADER = "Authorization" BEARER_PREFIX = "Bearer" CATALOG_SCOPE = "catalog" @@ -534,7 +534,7 @@ def _config_headers(self, session: Session) -> None: session.headers["Content-type"] = "application/json" session.headers["X-Client-Version"] = ICEBERG_REST_SPEC_VERSION session.headers["User-Agent"] = f"PyIceberg/{__version__}" - session.headers["X-Iceberg-Access-Delegation"] = self.properties.get(ACCESS_DELEGATION, ACCESS_DELEGATION_DEFAULT_VALUE) + session.headers["X-Iceberg-Access-Delegation"] = self.properties.get(ACCESS_DELEGATION, ACCESS_DELEGATION_DEFAULT) def _extract_headers_from_properties(self) -> Dict[str, str]: return {key[len(HEADER_PREFIX) :]: value for key, value in self.properties.items() if key.startswith(HEADER_PREFIX)} From 238df700ea60ff8d8a993dfca82d57cc930cfe4f Mon Sep 17 00:00:00 2001 From: guilhermecastro Date: Fri, 16 Aug 2024 11:02:50 -0300 Subject: [PATCH 12/14] rename access_delegation to access-delegation --- mkdocs/docs/configuration.md | 2 +- pyiceberg/catalog/rest.py | 2 +- tests/catalog/test_rest.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index ac2ceb071d..bed5f528f2 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -207,7 +207,7 @@ catalog: | scope | openid offline corpds:ds:profile | Desired scope of the requested security token (default : catalog) | | resource | rest_catalog.iceberg.com | URI for the target resource or service | | audience | rest_catalog | Logical name of target resource or service | -| access_delegation | remote-signing | A comma-separated list of access mechanisms to signal the server that the client supports delegated access. It will to be sended in `X-Iceberg-Access-Delegation` header. (default: vended-credentials) | +| access-delegation | remote-signing | A comma-separated list of access mechanisms to signal the server that the client supports delegated access. It will to be sended in `X-Iceberg-Access-Delegation` header. (default: vended-credentials) | | rest.sigv4-enabled | true | Sign requests to the REST Server using AWS SigV4 protocol | | rest.signing-region | us-east-1 | The region to use when SigV4 signing a request | | rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request | diff --git a/pyiceberg/catalog/rest.py b/pyiceberg/catalog/rest.py index 1c4caff8f3..c3a2e4118b 100644 --- a/pyiceberg/catalog/rest.py +++ b/pyiceberg/catalog/rest.py @@ -98,7 +98,7 @@ class Endpoints: rename_table: str = "tables/rename" -ACCESS_DELEGATION = "access_delegation" +ACCESS_DELEGATION = "access-delegation" ACCESS_DELEGATION_DEFAULT = "vended-credentials" AUTHORIZATION_HEADER = "Authorization" BEARER_PREFIX = "Bearer" diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index fb6db1c8b3..9d48680813 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -670,7 +670,7 @@ def test_load_table_honor_access_delegation( status_code=200, request_headers=test_headers_with_remote_signing, ) - catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, access_delegation="remote-signing") + catalog = RestCatalog("rest", **{"uri": TEST_URI, "token": TEST_TOKEN, "access-delegation": "remote-signing"}) actual = catalog.load_table(("fokko", "table")) expected = Table( identifier=("fokko", "table"), From ae4a677e2203c310aff959b206b45e3109022847 Mon Sep 17 00:00:00 2001 From: Guilherme Torres Castro <1149991+guitcastro@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:54:03 -0300 Subject: [PATCH 13/14] fix grammar Co-authored-by: Sung Yun <107272191+sungwy@users.noreply.github.com> --- mkdocs/docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index bed5f528f2..aaa4239323 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -207,7 +207,7 @@ catalog: | scope | openid offline corpds:ds:profile | Desired scope of the requested security token (default : catalog) | | resource | rest_catalog.iceberg.com | URI for the target resource or service | | audience | rest_catalog | Logical name of target resource or service | -| access-delegation | remote-signing | A comma-separated list of access mechanisms to signal the server that the client supports delegated access. It will to be sended in `X-Iceberg-Access-Delegation` header. (default: vended-credentials) | +| access-delegation | remote-signing | A comma-separated list of access mechanisms to signal the server that the client supports delegated access. It will be sent in `X-Iceberg-Access-Delegation` header. (default: vended-credentials) | | rest.sigv4-enabled | true | Sign requests to the REST Server using AWS SigV4 protocol | | rest.signing-region | us-east-1 | The region to use when SigV4 signing a request | | rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request | From 0c4feec7d4daee946d45da75c0d9c0ccb55c5ef2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Sat, 5 Oct 2024 00:24:14 -0600 Subject: [PATCH 14/14] Suggestions for #1033 --- pyiceberg/catalog/rest.py | 3 +-- tests/catalog/test_rest.py | 10 +++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pyiceberg/catalog/rest.py b/pyiceberg/catalog/rest.py index c2d6d417c9..1cc6bd27c0 100644 --- a/pyiceberg/catalog/rest.py +++ b/pyiceberg/catalog/rest.py @@ -112,7 +112,6 @@ class IdentifierKind(Enum): VIEW = "view" -ACCESS_DELEGATION = "access-delegation" ACCESS_DELEGATION_DEFAULT = "vended-credentials" AUTHORIZATION_HEADER = "Authorization" BEARER_PREFIX = "Bearer" @@ -557,7 +556,7 @@ def _config_headers(self, session: Session) -> None: session.headers["Content-type"] = "application/json" session.headers["X-Client-Version"] = ICEBERG_REST_SPEC_VERSION session.headers["User-Agent"] = f"PyIceberg/{__version__}" - session.headers["X-Iceberg-Access-Delegation"] = self.properties.get(ACCESS_DELEGATION, ACCESS_DELEGATION_DEFAULT) + session.headers.setdefault("X-Iceberg-Access-Delegation", ACCESS_DELEGATION_DEFAULT) def _extract_headers_from_properties(self) -> Dict[str, str]: return {key[len(HEADER_PREFIX) :]: value for key, value in self.properties.items() if key.startswith(HEADER_PREFIX)} diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 11252712b8..9d75154ae0 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -719,7 +719,15 @@ def test_load_table_honor_access_delegation( status_code=200, request_headers=test_headers_with_remote_signing, ) - catalog = RestCatalog("rest", **{"uri": TEST_URI, "token": TEST_TOKEN, "access-delegation": "remote-signing"}) + # catalog = RestCatalog("rest", **{"uri": TEST_URI, "token": TEST_TOKEN, "access-delegation": "remote-signing"}) + catalog = RestCatalog( + "rest", + **{ + "uri": TEST_URI, + "token": TEST_TOKEN, + "header.X-Iceberg-Access-Delegation": "remote-signing", + }, + ) actual = catalog.load_table(("fokko", "table")) expected = Table( identifier=("fokko", "table"),