From 2d62aea0f89fbf9121704f3b93d2cf46189def01 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 29 Jul 2020 12:18:17 +0100 Subject: [PATCH 01/11] Add internal URLMatcher class --- httpx/_utils.py | 40 ++++++++++++++++++++++++++++++++++++++++ tests/test_utils.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/httpx/_utils.py b/httpx/_utils.py index 55503655d3..9790858647 100644 --- a/httpx/_utils.py +++ b/httpx/_utils.py @@ -425,5 +425,45 @@ def elapsed(self) -> timedelta: return timedelta(seconds=self.end - self.start) +class URLMatcher: + def __init__(self, pattern: str) -> None: + from ._models import URL + + if pattern and ":" not in pattern: + pattern += "://" + + url = URL(pattern) + self.pattern = pattern + self.scheme = "" if url.scheme == "all" else url.scheme + self.host = url.host + self.port = url.port + + def matches(self, other: "URL") -> bool: + if self.scheme and self.scheme != other.scheme: + return False + if self.host and self.host != other.host: + return False + if self.port is not None and self.port != other.port: + return False + return True + + @property + def priority(self) -> tuple: + """ + The priority allows URLMatcher instances to be sortable, so that + if we can match from most specific to least specific. + """ + port_priority = -1 if self.port is not None else 0 + host_priority = -len(self.host) + scheme_priority = -len(self.scheme) + return (port_priority, host_priority, scheme_priority) + + def __lt__(self, other: "URLMatcher") -> bool: + return self.priority < other.priority + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, URLMatcher) and self.pattern == other.pattern + + def warn_deprecated(message: str) -> None: # pragma: nocover warnings.warn(message, DeprecationWarning, stacklevel=2) diff --git a/tests/test_utils.py b/tests/test_utils.py index fa30ee87cd..88fb1000bb 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,6 @@ import asyncio import os +import random import pytest @@ -7,6 +8,7 @@ from httpx._utils import ( ElapsedTimer, NetRCInfo, + URLMatcher, get_ca_bundle_from_env, get_environment_proxies, guess_json_utf, @@ -307,3 +309,43 @@ def test_not_same_origin(): origin1 = httpx.URL("https://example.com") origin2 = httpx.URL("HTTP://EXAMPLE.COM") assert not same_origin(origin1, origin2) + + +@pytest.mark.parametrize( + ["pattern", "url", "expected"], + [ + ("http://example.com", "http://example.com", True,), + ("http://example.com", "https://example.com", False,), + ("http://example.com", "http://other.com", False,), + ("http://example.com:123", "http://example.com:123", True,), + ("http://example.com:123", "http://example.com:456", False,), + ("http://example.com:123", "http://example.com", False,), + ("all://example.com", "http://example.com", True,), + ("all://example.com", "https://example.com", True,), + ("http://", "http://example.com", True,), + ("http://", "https://example.com", False,), + ("http", "http://example.com", True,), + ("http", "https://example.com", False,), + ("all", "https://example.com:123", True,), + ("", "https://example.com:123", True,), + ], +) +def test_url_matches(pattern, url, expected): + matcher = URLMatcher(pattern) + assert matcher.matches(httpx.URL(url)) == expected + + +def test_matcher_priority(): + matchers = [ + URLMatcher("all://"), + URLMatcher("http://"), + URLMatcher("http://example.com"), + URLMatcher("http://example.com:123"), + ] + random.shuffle(matchers) + assert sorted(matchers) == [ + URLMatcher("http://example.com:123"), + URLMatcher("http://example.com"), + URLMatcher("http://"), + URLMatcher("all://"), + ] From e1a7d45bcde5bc662d4713e0566e582caadbe63f Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 29 Jul 2020 12:40:05 +0100 Subject: [PATCH 02/11] Use URLMatcher for proxy lookups in transport_for_url --- httpx/_client.py | 45 +++++++++--------------------------- httpx/_utils.py | 3 +++ tests/client/test_proxies.py | 14 ++++++----- 3 files changed, 22 insertions(+), 40 deletions(-) diff --git a/httpx/_client.py b/httpx/_client.py index 518531986d..dca8718fcf 100644 --- a/httpx/_client.py +++ b/httpx/_client.py @@ -44,6 +44,7 @@ ) from ._utils import ( NetRCInfo, + URLMatcher, enforce_http_url, get_environment_proxies, get_logger, @@ -471,8 +472,8 @@ def __init__( app=app, trust_env=trust_env, ) - self._proxies: typing.Dict[str, httpcore.SyncHTTPTransport] = { - key: self._init_proxy_transport( + self._proxies: typing.Dict[URLMatcher, httpcore.SyncHTTPTransport] = { + URLMatcher(key): self._init_proxy_transport( proxy, verify=verify, cert=cert, @@ -482,6 +483,7 @@ def __init__( ) for key, proxy in proxy_map.items() } + self._proxies = dict(sorted(self._proxies.items())) def _init_transport( self, @@ -539,21 +541,8 @@ def _transport_for_url(self, url: URL) -> httpcore.SyncHTTPTransport: enforce_http_url(url) if self._proxies and not should_not_be_proxied(url): - default_port = {"http": 80, "https": 443}[url.scheme] - is_default_port = url.port is None or url.port == default_port - port = url.port or default_port - hostname = f"{url.host}:{port}" - proxy_keys = ( - f"{url.scheme}://{hostname}", - f"{url.scheme}://{url.host}" if is_default_port else None, - f"all://{hostname}", - f"all://{url.host}" if is_default_port else None, - url.scheme, - "all", - ) - for proxy_key in proxy_keys: - if proxy_key and proxy_key in self._proxies: - transport = self._proxies[proxy_key] + for matcher, transport in self._proxies.items(): + if matcher.matches(url): return transport return self._transport @@ -1000,8 +989,8 @@ def __init__( app=app, trust_env=trust_env, ) - self._proxies: typing.Dict[str, httpcore.AsyncHTTPTransport] = { - key: self._init_proxy_transport( + self._proxies: typing.Dict[URLMatcher, httpcore.AsyncHTTPTransport] = { + URLMatcher(key): self._init_proxy_transport( proxy, verify=verify, cert=cert, @@ -1011,6 +1000,7 @@ def __init__( ) for key, proxy in proxy_map.items() } + self._proxies = dict(sorted(self._proxies.items())) def _init_transport( self, @@ -1068,21 +1058,8 @@ def _transport_for_url(self, url: URL) -> httpcore.AsyncHTTPTransport: enforce_http_url(url) if self._proxies and not should_not_be_proxied(url): - default_port = {"http": 80, "https": 443}[url.scheme] - is_default_port = url.port is None or url.port == default_port - port = url.port or default_port - hostname = f"{url.host}:{port}" - proxy_keys = ( - f"{url.scheme}://{hostname}", - f"{url.scheme}://{url.host}" if is_default_port else None, - f"all://{hostname}", - f"all://{url.host}" if is_default_port else None, - url.scheme, - "all", - ) - for proxy_key in proxy_keys: - if proxy_key and proxy_key in self._proxies: - transport = self._proxies[proxy_key] + for matcher, transport in self._proxies.items(): + if matcher.matches(url): return transport return self._transport diff --git a/httpx/_utils.py b/httpx/_utils.py index 9790858647..d5d84d59ff 100644 --- a/httpx/_utils.py +++ b/httpx/_utils.py @@ -458,6 +458,9 @@ def priority(self) -> tuple: scheme_priority = -len(self.scheme) return (port_priority, host_priority, scheme_priority) + def __hash__(self) -> int: + return hash(self.pattern) + def __lt__(self, other: "URLMatcher") -> bool: return self.priority < other.priority diff --git a/tests/client/test_proxies.py b/tests/client/test_proxies.py index f5af90cc81..6120bc1d38 100644 --- a/tests/client/test_proxies.py +++ b/tests/client/test_proxies.py @@ -2,6 +2,7 @@ import pytest import httpx +from httpx._utils import URLMatcher def url_to_origin(url: str): @@ -36,8 +37,9 @@ def test_proxies_parameter(proxies, expected_proxies): client = httpx.AsyncClient(proxies=proxies) for proxy_key, url in expected_proxies: - assert proxy_key in client._proxies - proxy = client._proxies[proxy_key] + matcher = URLMatcher(proxy_key) + assert matcher in client._proxies + proxy = client._proxies[matcher] assert isinstance(proxy, httpcore.AsyncHTTPProxy) assert proxy.proxy_origin == url_to_origin(url) @@ -54,15 +56,15 @@ def test_proxies_parameter(proxies, expected_proxies): ("http://example.com", {}, None), ("http://example.com", {"https": PROXY_URL}, None), ("http://example.com", {"http://example.net": PROXY_URL}, None), - ("http://example.com:443", {"http://example.com": PROXY_URL}, None), + ("http://example.com:443", {"http://example.com": PROXY_URL}, PROXY_URL), ("http://example.com", {"all": PROXY_URL}, PROXY_URL), ("http://example.com", {"http": PROXY_URL}, PROXY_URL), ("http://example.com", {"all://example.com": PROXY_URL}, PROXY_URL), - ("http://example.com", {"all://example.com:80": PROXY_URL}, PROXY_URL), + ("http://example.com", {"all://example.com:80": PROXY_URL}, None), ("http://example.com", {"http://example.com": PROXY_URL}, PROXY_URL), - ("http://example.com", {"http://example.com:80": PROXY_URL}, PROXY_URL), + ("http://example.com", {"http://example.com:80": PROXY_URL}, None), ("http://example.com:8080", {"http://example.com:8080": PROXY_URL}, PROXY_URL), - ("http://example.com:8080", {"http://example.com": PROXY_URL}, None), + ("http://example.com:8080", {"http://example.com": PROXY_URL}, PROXY_URL), ( "http://example.com", { From b549492b7abbf65bb31319c2974314f84cb930fc Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 29 Jul 2020 13:29:42 +0100 Subject: [PATCH 03/11] Docstring --- httpx/_utils.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/httpx/_utils.py b/httpx/_utils.py index d5d84d59ff..5ff83c59ec 100644 --- a/httpx/_utils.py +++ b/httpx/_utils.py @@ -426,6 +426,47 @@ def elapsed(self) -> timedelta: class URLMatcher: + """ + A utility class currently used for making lookups against proxy keys... + + # Wildcard matching... + >>> pattern = URLMatcher("all") + >>> pattern.matches(httpx.URL("http://example.com")) + True + + # Witch scheme matching... + >>> pattern = URLMatcher("https") + >>> pattern.matches(httpx.URL("https://example.com")) + True + >>> pattern.matches(httpx.URL("http://example.com")) + False + + # With domain matching... + >>> pattern = URLMatcher("https://example.com") + >>> pattern.matches(httpx.URL("https://example.com")) + True + >>> pattern.matches(httpx.URL("http://example.com")) + False + >>> pattern.matches(httpx.URL("https://other.com")) + False + + # Wildcard scheme, with domain matching... + >>> pattern = URLMatcher("all://example.com") + >>> pattern.matches(httpx.URL("https://example.com")) + True + >>> pattern.matches(httpx.URL("http://example.com")) + True + >>> pattern.matches(httpx.URL("https://other.com")) + False + + # With port matching... + >>> pattern = URLMatcher("https://example.com:1234") + >>> pattern.matches(httpx.URL("https://example.com:1234")) + True + >>> pattern.matches(httpx.URL("https://example.com")) + False + """ + def __init__(self, pattern: str) -> None: from ._models import URL From 18adaca9bd42403f45386fe53fe032f7f64e5ef8 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 29 Jul 2020 13:29:49 +0100 Subject: [PATCH 04/11] Pin pytest --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 42607063ce..cc05742a8b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ flake8-bugbear flake8-pie isort==5.* mypy -pytest +pytest==5.* pytest-asyncio pytest-trio pytest-cov From 07b61ea7f0dd92a517d2cc1b8b9c75797220ca3e Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 29 Jul 2020 15:32:11 +0100 Subject: [PATCH 05/11] Add support for no-proxies configurations --- httpx/_client.py | 36 +++++++++++++++++++++++------------- httpx/_types.py | 2 +- tests/client/test_proxies.py | 1 + 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/httpx/_client.py b/httpx/_client.py index dca8718fcf..4a7fc030a0 100644 --- a/httpx/_client.py +++ b/httpx/_client.py @@ -86,7 +86,7 @@ def __init__( def _get_proxy_map( self, proxies: typing.Optional[ProxiesTypes], trust_env: bool, - ) -> typing.Dict[str, Proxy]: + ) -> typing.Dict[str, typing.Optional[Proxy]]: if proxies is None: if trust_env: return { @@ -94,15 +94,15 @@ def _get_proxy_map( for key, url in get_environment_proxies().items() } return {} - elif isinstance(proxies, (str, URL, Proxy)): - proxy = Proxy(url=proxies) if isinstance(proxies, (str, URL)) else proxies - return {"all": proxy} - else: + if isinstance(proxies, dict): new_proxies = {} for key, value in proxies.items(): proxy = Proxy(url=value) if isinstance(value, (str, URL)) else value new_proxies[str(key)] = proxy return new_proxies + else: + proxy = Proxy(url=proxies) if isinstance(proxies, (str, URL)) else proxies + return {"all": proxy} @property def headers(self) -> Headers: @@ -472,8 +472,12 @@ def __init__( app=app, trust_env=trust_env, ) - self._proxies: typing.Dict[URLMatcher, httpcore.SyncHTTPTransport] = { - URLMatcher(key): self._init_proxy_transport( + self._proxies: typing.Dict[ + URLMatcher, typing.Optional[httpcore.SyncHTTPTransport] + ] = { + URLMatcher(key): None + if proxy is None + else self._init_proxy_transport( proxy, verify=verify, cert=cert, @@ -543,7 +547,7 @@ def _transport_for_url(self, url: URL) -> httpcore.SyncHTTPTransport: if self._proxies and not should_not_be_proxied(url): for matcher, transport in self._proxies.items(): if matcher.matches(url): - return transport + return self._transport if transport is None else transport return self._transport @@ -885,7 +889,8 @@ def delete( def close(self) -> None: self._transport.close() for proxy in self._proxies.values(): - proxy.close() + if proxy is not None: + proxy.close() def __enter__(self) -> "Client": return self @@ -989,8 +994,12 @@ def __init__( app=app, trust_env=trust_env, ) - self._proxies: typing.Dict[URLMatcher, httpcore.AsyncHTTPTransport] = { - URLMatcher(key): self._init_proxy_transport( + self._proxies: typing.Dict[ + URLMatcher, typing.Optional[httpcore.AsyncHTTPTransport] + ] = { + URLMatcher(key): None + if proxy is None + else self._init_proxy_transport( proxy, verify=verify, cert=cert, @@ -1060,7 +1069,7 @@ def _transport_for_url(self, url: URL) -> httpcore.AsyncHTTPTransport: if self._proxies and not should_not_be_proxied(url): for matcher, transport in self._proxies.items(): if matcher.matches(url): - return transport + return self._transport if transport is None else transport return self._transport @@ -1402,7 +1411,8 @@ async def delete( async def aclose(self) -> None: await self._transport.aclose() for proxy in self._proxies.values(): - await proxy.aclose() + if proxy is not None: + await proxy.aclose() async def __aenter__(self) -> "AsyncClient": return self diff --git a/httpx/_types.py b/httpx/_types.py index bfce5650da..89a90a2cf5 100644 --- a/httpx/_types.py +++ b/httpx/_types.py @@ -54,7 +54,7 @@ Tuple[Optional[float], Optional[float], Optional[float], Optional[float]], "Timeout", ] -ProxiesTypes = Union[URLTypes, "Proxy", Dict[URLTypes, Union[URLTypes, "Proxy"]]] +ProxiesTypes = Union[URLTypes, "Proxy", Dict[URLTypes, Union[None, URLTypes, "Proxy"]]] AuthTypes = Union[ Tuple[Union[str, bytes], Union[str, bytes]], diff --git a/tests/client/test_proxies.py b/tests/client/test_proxies.py index 6120bc1d38..d361372c1f 100644 --- a/tests/client/test_proxies.py +++ b/tests/client/test_proxies.py @@ -58,6 +58,7 @@ def test_proxies_parameter(proxies, expected_proxies): ("http://example.com", {"http://example.net": PROXY_URL}, None), ("http://example.com:443", {"http://example.com": PROXY_URL}, PROXY_URL), ("http://example.com", {"all": PROXY_URL}, PROXY_URL), + ("http://example.com", {"all": PROXY_URL, "http://example.com": None}, None), ("http://example.com", {"http": PROXY_URL}, PROXY_URL), ("http://example.com", {"all://example.com": PROXY_URL}, PROXY_URL), ("http://example.com", {"all://example.com:80": PROXY_URL}, None), From 3f52b0b7c95b50f499c16196d4d65c0a8a470aa3 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 30 Jul 2020 15:13:45 +0100 Subject: [PATCH 06/11] Don't call should_not_proxy on each request --- httpx/_client.py | 17 +++----- httpx/_utils.py | 84 ++++++++++++++++++++++-------------- tests/client/test_proxies.py | 14 ++++++ tests/test_utils.py | 44 ++++++++++++------- 4 files changed, 101 insertions(+), 58 deletions(-) diff --git a/httpx/_client.py b/httpx/_client.py index 4a7fc030a0..246defcba1 100644 --- a/httpx/_client.py +++ b/httpx/_client.py @@ -49,7 +49,6 @@ get_environment_proxies, get_logger, same_origin, - should_not_be_proxied, ) logger = get_logger(__name__) @@ -90,7 +89,7 @@ def _get_proxy_map( if proxies is None: if trust_env: return { - key: Proxy(url=url) + key: None if url is None else Proxy(url=url) for key, url in get_environment_proxies().items() } return {} @@ -544,10 +543,9 @@ def _transport_for_url(self, url: URL) -> httpcore.SyncHTTPTransport: """ enforce_http_url(url) - if self._proxies and not should_not_be_proxied(url): - for matcher, transport in self._proxies.items(): - if matcher.matches(url): - return self._transport if transport is None else transport + for matcher, transport in self._proxies.items(): + if matcher.matches(url): + return self._transport if transport is None else transport return self._transport @@ -1066,10 +1064,9 @@ def _transport_for_url(self, url: URL) -> httpcore.AsyncHTTPTransport: """ enforce_http_url(url) - if self._proxies and not should_not_be_proxied(url): - for matcher, transport in self._proxies.items(): - if matcher.matches(url): - return self._transport if transport is None else transport + for matcher, transport in self._proxies.items(): + if matcher.matches(url): + return self._transport if transport is None else transport return self._transport diff --git a/httpx/_utils.py b/httpx/_utils.py index 5ff83c59ec..c56fbfede4 100644 --- a/httpx/_utils.py +++ b/httpx/_utils.py @@ -290,42 +290,38 @@ def same_origin(url: "URL", other: "URL") -> bool: ) -def should_not_be_proxied(url: "URL") -> bool: - """ - Return True if url should not be proxied, - return False otherwise. - """ - no_proxy = getproxies().get("no") - if not no_proxy: - return False - no_proxy_list = [host.strip() for host in no_proxy.split(",")] - for name in no_proxy_list: - if name == "*": - return True - if name: - name = name.lstrip(".") # ignore leading dots - name = re.escape(name) - pattern = r"(.+\.)?%s$" % name - if re.match(pattern, url.host, re.I) or re.match( - pattern, url.authority, re.I - ): - return True - return False - - -def get_environment_proxies() -> typing.Dict[str, str]: +def get_environment_proxies() -> typing.Dict[str, typing.Optional[str]]: """Gets proxy information from the environment""" # urllib.request.getproxies() falls back on System # Registry and Config for proxies on Windows and macOS. # We don't want to propagate non-HTTP proxies into # our configuration such as 'TRAVIS_APT_PROXY'. - supported_proxy_schemes = ("http", "https", "all") - return { - key: val - for key, val in getproxies().items() - if ("://" in key or key in supported_proxy_schemes) - } + proxy_info = getproxies() + mounts: typing.Dict[str, typing.Optional[str]] = {} + + for scheme in ("http", "https", "all"): + if proxy_info.get(scheme): + mounts[scheme] = proxy_info[scheme] + + no_proxy_hosts = [host.strip() for host in proxy_info.get("no", "").split(",")] + for hostname in no_proxy_hosts: + # See https://curl.haxx.se/libcurl/c/CURLOPT_NOPROXY.html for details + # on how names in `NO_PROXY` are handled. + if hostname == "*": + # If NO_PROXY=* is used or if "*" occurs as any one of the comma + # seperated hostnames, then we should just bypass any information + # from HTTP_PROXY, HTTPS_PROXY, ALL_PROXY, and always ignore + # proxies. + return {} + elif hostname: + # NO_PROXY=.google.com is marked as "all://*.google.com, + # which disables "www.google.com" but not "google.com" + # NO_PROXY=google.com is marked as "all://*.google.com, + # which disables "www.google.com" and "google.com". + mounts[f"all://*{hostname}"] = None + + return mounts def to_bytes(value: typing.Union[str, bytes], encoding: str = "utf-8") -> bytes: @@ -476,13 +472,32 @@ def __init__(self, pattern: str) -> None: url = URL(pattern) self.pattern = pattern self.scheme = "" if url.scheme == "all" else url.scheme - self.host = url.host + self.host = "" if url.host == "*" else url.host self.port = url.port + if not url.host or url.host == "*": + self.host_regex: typing.Optional[typing.Pattern[str]] = None + else: + if url.host.startswith("*."): + # *.example.com should match "www.example.com", but not "example.com" + domain = re.escape(url.host[2:]) + self.host_regex = re.compile(f"^.+\\.{domain}$") + elif url.host.startswith("*"): + # *example.com should match "www.example.com" and "example.com" + domain = re.escape(url.host[1:]) + self.host_regex = re.compile(f"^(.+\\.)?{domain}$") + else: + # example.com should match "example.com" but not "www.example.com" + domain = re.escape(url.host) + self.host_regex = re.compile(f"^{domain}$") def matches(self, other: "URL") -> bool: if self.scheme and self.scheme != other.scheme: return False - if self.host and self.host != other.host: + if ( + self.host + and self.host_regex is not None + and not self.host_regex.match(other.host) + ): return False if self.port is not None and self.port != other.port: return False @@ -494,8 +509,11 @@ def priority(self) -> tuple: The priority allows URLMatcher instances to be sortable, so that if we can match from most specific to least specific. """ - port_priority = -1 if self.port is not None else 0 + # URLs with a port should take priority over URLs without a port. + port_priority = 0 if self.port is not None else 1 + # Longer hostnames should match first. host_priority = -len(self.host) + # Longer schemes should match first. scheme_priority = -len(self.scheme) return (port_priority, host_priority, scheme_priority) diff --git a/tests/client/test_proxies.py b/tests/client/test_proxies.py index d361372c1f..3eb884ca6e 100644 --- a/tests/client/test_proxies.py +++ b/tests/client/test_proxies.py @@ -56,6 +56,20 @@ def test_proxies_parameter(proxies, expected_proxies): ("http://example.com", {}, None), ("http://example.com", {"https": PROXY_URL}, None), ("http://example.com", {"http://example.net": PROXY_URL}, None), + # Using "*" should match any domain name. + ("http://example.com", {"http://*": PROXY_URL}, PROXY_URL), + ("https://example.com", {"http://*": PROXY_URL}, None), + # Using "example.com" should match example.com, but not www.example.com + ("http://example.com", {"http://example.com": PROXY_URL}, PROXY_URL), + ("http://www.example.com", {"http://example.com": PROXY_URL}, None), + # Using "*.example.com" should match www.example.com, but not example.com + ("http://example.com", {"http://*.example.com": PROXY_URL}, None), + ("http://www.example.com", {"http://*.example.com": PROXY_URL}, PROXY_URL), + # Using "*example.com" should match example.com and www.example.com + ("http://example.com", {"http://*example.com": PROXY_URL}, PROXY_URL), + ("http://www.example.com", {"http://*example.com": PROXY_URL}, PROXY_URL), + ("http://wwwexample.com", {"http://*example.com": PROXY_URL}, None), + # ... ("http://example.com:443", {"http://example.com": PROXY_URL}, PROXY_URL), ("http://example.com", {"all": PROXY_URL}, PROXY_URL), ("http://example.com", {"all": PROXY_URL, "http://example.com": None}, None), diff --git a/tests/test_utils.py b/tests/test_utils.py index 88fb1000bb..81090cc9c1 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -15,7 +15,6 @@ obfuscate_sensitive_headers, parse_header_links, same_origin, - should_not_be_proxied, ) from tests.utils import override_log_level @@ -228,75 +227,90 @@ def test_obfuscate_sensitive_headers(headers, output): [ ( "http://127.0.0.1", - {"NO_PROXY": ""}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": ""}, False, ), # everything proxied when no_proxy is empty/unset ( "http://127.0.0.1", - {"NO_PROXY": "127.0.0.1"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "127.0.0.1"}, True, ), # no_proxy as ip case is matched ( "http://127.0.0.1", - {"NO_PROXY": "https://127.0.0.1"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "https://127.0.0.1"}, False, ), # no_proxy with scheme is ignored ( "http://127.0.0.1", - {"NO_PROXY": "1.1.1.1"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "1.1.1.1"}, False, ), # different no_proxy means its proxied ( "http://courses.mit.edu", - {"NO_PROXY": "mit.edu"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu"}, True, ), # no_proxy for sub-domain matches ( "https://mit.edu.info", - {"NO_PROXY": "mit.edu"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu"}, False, ), # domain is actually edu.info, so should be proxied ( "https://mit.edu.info", - {"NO_PROXY": "mit.edu,edu.info"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu,edu.info"}, True, ), # list in no_proxy, matches second domain ( "https://mit.edu.info", - {"NO_PROXY": "mit.edu, edu.info"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu, edu.info"}, True, ), # list with spaces in no_proxy ( "https://mit.edu.info", - {"NO_PROXY": "mit.edu,mit.info"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu,mit.info"}, False, ), # list in no_proxy, without any domain matching ( "https://foo.example.com", - {"NO_PROXY": "www.example.com"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "www.example.com"}, False, ), # different subdomains foo vs www means we still proxy ( "https://www.example1.com", - {"NO_PROXY": ".example1.com"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": ".example1.com"}, True, ), # no_proxy starting with dot ( "https://www.example2.com", - {"NO_PROXY": "ample2.com"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "ample2.com"}, False, ), # whole-domain matching ( "https://www.example3.com", - {"NO_PROXY": "*"}, + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "*"}, True, ), # wildcard * means nothing proxied ], ) def test_should_not_be_proxied(url, no_proxy, expected): os.environ.update(no_proxy) + proxies = { + URLMatcher(key): value for key, value in get_environment_proxies().items() + } parsed_url = httpx.URL(url) - assert should_not_be_proxied(parsed_url) == expected + + should_not_proxy = True + print(get_environment_proxies()) + print(url) + for matcher, value in sorted(proxies.items()): + if matcher.matches(parsed_url): + print("yup", matcher.pattern, value) + should_not_proxy = value is None + break + else: + print("nope", matcher.pattern) + + assert should_not_proxy == expected def test_same_origin(): From 4906019ea9d0314a47da0055921948c8d594f4b3 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Fri, 31 Jul 2020 10:26:44 +0100 Subject: [PATCH 07/11] Drop print statements --- tests/test_utils.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 81090cc9c1..fcbec55a30 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -300,15 +300,10 @@ def test_should_not_be_proxied(url, no_proxy, expected): parsed_url = httpx.URL(url) should_not_proxy = True - print(get_environment_proxies()) - print(url) for matcher, value in sorted(proxies.items()): if matcher.matches(parsed_url): - print("yup", matcher.pattern, value) should_not_proxy = value is None break - else: - print("nope", matcher.pattern) assert should_not_proxy == expected From 20ceb3739f4db4ee4bbeda2d34f38140dfc3f1bc Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Fri, 31 Jul 2020 10:27:58 +0100 Subject: [PATCH 08/11] Tweak comment --- httpx/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/httpx/_utils.py b/httpx/_utils.py index cbf7217a53..b039cddeb8 100644 --- a/httpx/_utils.py +++ b/httpx/_utils.py @@ -321,7 +321,7 @@ def get_environment_proxies() -> typing.Dict[str, typing.Optional[str]]: elif hostname: # NO_PROXY=.google.com is marked as "all://*.google.com, # which disables "www.google.com" but not "google.com" - # NO_PROXY=google.com is marked as "all://*.google.com, + # NO_PROXY=google.com is marked as "all://*google.com, # which disables "www.google.com" and "google.com". mounts[f"all://*{hostname}"] = None From 2008ce79ee30251097dcf5727bb6d53ae1795008 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Fri, 31 Jul 2020 10:29:03 +0100 Subject: [PATCH 09/11] Tweak comment on domain wildcards --- httpx/_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/httpx/_utils.py b/httpx/_utils.py index b039cddeb8..c6f21c5ce7 100644 --- a/httpx/_utils.py +++ b/httpx/_utils.py @@ -323,6 +323,7 @@ def get_environment_proxies() -> typing.Dict[str, typing.Optional[str]]: # which disables "www.google.com" but not "google.com" # NO_PROXY=google.com is marked as "all://*google.com, # which disables "www.google.com" and "google.com". + # (But not "wwwgoogle.com") mounts[f"all://*{hostname}"] = None return mounts From 544c41108c9fff6ed2f7398a56a81c2a7daf33c0 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Sat, 1 Aug 2020 20:14:19 +0100 Subject: [PATCH 10/11] Update httpx/_utils.py Co-authored-by: Florimond Manca --- httpx/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/httpx/_utils.py b/httpx/_utils.py index 665ea5b1af..9eec1c3f91 100644 --- a/httpx/_utils.py +++ b/httpx/_utils.py @@ -517,7 +517,7 @@ def matches(self, other: "URL") -> bool: def priority(self) -> tuple: """ The priority allows URLMatcher instances to be sortable, so that - if we can match from most specific to least specific. + we can match from most specific to least specific. """ # URLs with a port should take priority over URLs without a port. port_priority = 0 if self.port is not None else 1 From 34f92881feef27ebae7c92a8479613d05f1aeabf Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Sun, 2 Aug 2020 09:38:40 +0100 Subject: [PATCH 11/11] Pull test_should_not_be_proxied cases into test_proxies_environ --- tests/client/test_proxies.py | 79 +++++++++++++++++++++++++++++++++ tests/test_utils.py | 86 ------------------------------------ 2 files changed, 79 insertions(+), 86 deletions(-) diff --git a/tests/client/test_proxies.py b/tests/client/test_proxies.py index c4cf527118..cffa75d5d6 100644 --- a/tests/client/test_proxies.py +++ b/tests/client/test_proxies.py @@ -148,6 +148,85 @@ def test_unsupported_proxy_scheme(): {"HTTP_PROXY": "http://example.com", "NO_PROXY": "google.com"}, None, ), + # Everything proxied when NO_PROXY is empty/unset + ( + "http://127.0.0.1", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": ""}, + "http://localhost:123", + ), + # Not proxied if NO_PROXY matches URL. + ( + "http://127.0.0.1", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "127.0.0.1"}, + None, + ), + # Proxied if NO_PROXY scheme does not match URL. + ( + "http://127.0.0.1", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "https://127.0.0.1"}, + "http://localhost:123", + ), + # Proxied if NO_PROXY scheme does not match host. + ( + "http://127.0.0.1", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "1.1.1.1"}, + "http://localhost:123", + ), + # Not proxied if NO_PROXY matches host domain suffix. + ( + "http://courses.mit.edu", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu"}, + None, + ), + # Proxied even though NO_PROXY matches host domain *prefix*. + ( + "https://mit.edu.info", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu"}, + "http://localhost:123", + ), + # Not proxied if one item in NO_PROXY case matches host domain suffix. + ( + "https://mit.edu.info", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu,edu.info"}, + None, + ), + # Not proxied if one item in NO_PROXY case matches host domain suffix. + # May include whitespace. + ( + "https://mit.edu.info", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu, edu.info"}, + None, + ), + # Proxied if no items in NO_PROXY match. + ( + "https://mit.edu.info", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu,mit.info"}, + "http://localhost:123", + ), + # Proxied if NO_PROXY domain doesn't match. + ( + "https://foo.example.com", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "www.example.com"}, + "http://localhost:123", + ), + # Not proxied for subdomains matching NO_PROXY, with a leading ".". + ( + "https://www.example1.com", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": ".example1.com"}, + None, + ), + # Proxied, because NO_PROXY subdomains only match if "." seperated. + ( + "https://www.example2.com", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "ample2.com"}, + "http://localhost:123", + ), + # No requests are proxied if NO_PROXY="*" is set. + ( + "https://www.example3.com", + {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "*"}, + None, + ), ], ) @pytest.mark.parametrize("client_class", [httpx.Client, httpx.AsyncClient]) diff --git a/tests/test_utils.py b/tests/test_utils.py index 4b892aa137..d564ceb75a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -222,92 +222,6 @@ def test_obfuscate_sensitive_headers(headers, output): assert list(obfuscate_sensitive_headers(bytes_headers)) == bytes_output -@pytest.mark.parametrize( - ["url", "no_proxy", "expected"], - [ - ( - "http://127.0.0.1", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": ""}, - False, - ), # everything proxied when no_proxy is empty/unset - ( - "http://127.0.0.1", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "127.0.0.1"}, - True, - ), # no_proxy as ip case is matched - ( - "http://127.0.0.1", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "https://127.0.0.1"}, - False, - ), # no_proxy with scheme is ignored - ( - "http://127.0.0.1", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "1.1.1.1"}, - False, - ), # different no_proxy means its proxied - ( - "http://courses.mit.edu", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu"}, - True, - ), # no_proxy for sub-domain matches - ( - "https://mit.edu.info", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu"}, - False, - ), # domain is actually edu.info, so should be proxied - ( - "https://mit.edu.info", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu,edu.info"}, - True, - ), # list in no_proxy, matches second domain - ( - "https://mit.edu.info", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu, edu.info"}, - True, - ), # list with spaces in no_proxy - ( - "https://mit.edu.info", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu,mit.info"}, - False, - ), # list in no_proxy, without any domain matching - ( - "https://foo.example.com", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "www.example.com"}, - False, - ), # different subdomains foo vs www means we still proxy - ( - "https://www.example1.com", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": ".example1.com"}, - True, - ), # no_proxy starting with dot - ( - "https://www.example2.com", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "ample2.com"}, - False, - ), # whole-domain matching - ( - "https://www.example3.com", - {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "*"}, - True, - ), # wildcard * means nothing proxied - ], -) -def test_should_not_be_proxied(url, no_proxy, expected): - os.environ.update(no_proxy) - proxies = { - URLPattern(key): value for key, value in get_environment_proxies().items() - } - parsed_url = httpx.URL(url) - - should_not_proxy = True - for matcher, value in sorted(proxies.items()): - if matcher.matches(parsed_url): - should_not_proxy = value is None - break - - assert should_not_proxy == expected - - def test_same_origin(): origin1 = httpx.URL("https://example.com") origin2 = httpx.URL("HTTPS://EXAMPLE.COM:443")