From 88b8c9758e03c2f9924b6158d52016a8b88418e4 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Wed, 25 Mar 2026 10:17:02 -0700 Subject: [PATCH] fix(sandbox): strip query string from L7 path matching Previously, L7 policy path rules received the full request URI including the query string. This caused exact path rules like `path: /api/v1/download` to silently fail when clients added query parameters (e.g., `/api/v1/download?slug=foo`), because Rego's glob.match treats the query string as part of the last path segment. This fix splits the request target into path and query components during HTTP parsing. Path rules now match only the path component, and query strings are passed through transparently to the upstream server. This matches user expectations: a path rule controls which endpoints are reachable, not which query parameters are allowed. Changes: - Add `query` field to L7Request and L7RequestInfo structs - Split path/query in parse_http_request before L7 policy evaluation - Pass query string to Rego input for future query param filtering - Add l7_query field to L7_REQUEST log output - Add tests for query string splitting and path matching with query params - Document path matching behavior in policy-schema.md Refs: NVIDIA/OpenShell/discussions/607 --- crates/openshell-sandbox/src/l7/mod.rs | 4 +- crates/openshell-sandbox/src/l7/provider.rs | 6 +- crates/openshell-sandbox/src/l7/relay.rs | 3 + crates/openshell-sandbox/src/l7/rest.rs | 82 +++++++++++++++++++-- crates/openshell-sandbox/src/opa.rs | 63 +++++++++++++++- docs/reference/policy-schema.md | 6 ++ 6 files changed, 156 insertions(+), 8 deletions(-) diff --git a/crates/openshell-sandbox/src/l7/mod.rs b/crates/openshell-sandbox/src/l7/mod.rs index 09e54788..089455dc 100644 --- a/crates/openshell-sandbox/src/l7/mod.rs +++ b/crates/openshell-sandbox/src/l7/mod.rs @@ -74,8 +74,10 @@ pub struct L7Decision { pub struct L7RequestInfo { /// Protocol action: HTTP method (GET, POST, ...) or SQL command (SELECT, INSERT, ...). pub action: String, - /// Target: URL path for REST, or empty for SQL. + /// Target: URL path for REST (without query string), or empty for SQL. pub target: String, + /// Query string from the request URI (after `?`), empty if absent. + pub query: String, } /// Parse an L7 endpoint config from a regorus Value (returned by Rego query). diff --git a/crates/openshell-sandbox/src/l7/provider.rs b/crates/openshell-sandbox/src/l7/provider.rs index a9bf8bf5..71be9313 100644 --- a/crates/openshell-sandbox/src/l7/provider.rs +++ b/crates/openshell-sandbox/src/l7/provider.rs @@ -29,8 +29,12 @@ pub enum BodyLength { pub struct L7Request { /// Protocol action: HTTP method or SQL command. pub action: String, - /// Target: URL path for REST, empty for SQL. + /// Target: URL path for REST (without query string), empty for SQL. pub target: String, + /// Query string from the request URI (after `?`), empty if absent. + /// Kept separate from `target` so path-based policy matching is not + /// affected by the presence or content of query parameters. + pub query: String, /// Raw request header bytes (request line + headers for HTTP, message for SQL). /// May include overflow body bytes read during header parsing. pub raw_header: Vec, diff --git a/crates/openshell-sandbox/src/l7/relay.rs b/crates/openshell-sandbox/src/l7/relay.rs index 940e7f94..dbaf45ff 100644 --- a/crates/openshell-sandbox/src/l7/relay.rs +++ b/crates/openshell-sandbox/src/l7/relay.rs @@ -108,6 +108,7 @@ where let request_info = L7RequestInfo { action: req.action.clone(), target: req.target.clone(), + query: req.query.clone(), }; // Evaluate L7 policy via Rego @@ -127,6 +128,7 @@ where l7_protocol = "rest", l7_action = %request_info.action, l7_target = %request_info.target, + l7_query = %request_info.query, l7_decision = decision_str, l7_deny_reason = %reason, "L7_REQUEST", @@ -198,6 +200,7 @@ fn evaluate_l7_request( "request": { "method": request.action, "path": request.target, + "query": request.query, } }); diff --git a/crates/openshell-sandbox/src/l7/rest.rs b/crates/openshell-sandbox/src/l7/rest.rs index ebb34957..61735aa2 100644 --- a/crates/openshell-sandbox/src/l7/rest.rs +++ b/crates/openshell-sandbox/src/l7/rest.rs @@ -116,10 +116,16 @@ async fn parse_http_request(client: &mut C) -> Result(client: &mut C) -> Result serde_json::Value { + l7_input_with_query(host, port, method, path, "") + } + + fn l7_input_with_query( + host: &str, + port: u16, + method: &str, + path: &str, + query: &str, + ) -> serde_json::Value { serde_json::json!({ "network": { "host": host, "port": port }, "exec": { @@ -1368,7 +1378,8 @@ process: }, "request": { "method": method, - "path": path + "path": path, + "query": query } }) } @@ -1472,6 +1483,56 @@ process: assert!(eval_l7(&engine, &input)); } + /// Query strings are now stripped from the path before policy evaluation. + /// This test verifies that an exact path rule matches requests with query params. + #[test] + fn l7_path_with_query_string_matches_exact_path() { + let engine = l7_engine(); + // Rule: path "/repos/*/issues" should match even with query params + let input = l7_input_with_query( + "api.example.com", + 8080, + "POST", + "/repos/org/issues", + "page=2&per_page=50", + ); + assert!( + eval_l7(&engine, &input), + "exact path should match regardless of query string" + ); + } + + /// Query strings don't affect glob path matching. + #[test] + fn l7_path_glob_with_query_string() { + let engine = l7_engine(); + // Rule: path "/repos/**" should match with query params + let input = l7_input_with_query( + "api.example.com", + 8080, + "GET", + "/repos/org/repo/pulls", + "state=open", + ); + assert!( + eval_l7(&engine, &input), + "glob path should match regardless of query string" + ); + } + + /// Path that doesn't match the rule should still be denied even with query string. + #[test] + fn l7_wrong_path_with_query_string_denied() { + let engine = l7_engine(); + // Rule only allows /repos/**, not /admin/** + let input = + l7_input_with_query("api.example.com", 8080, "GET", "/admin/settings", "foo=bar"); + assert!( + !eval_l7(&engine, &input), + "non-matching path should still be denied" + ); + } + #[test] fn l7_no_request_on_l4_only_endpoint() { // L4-only endpoint should not match L7 allow_request diff --git a/docs/reference/policy-schema.md b/docs/reference/policy-schema.md index cb37d0ba..920506d3 100644 --- a/docs/reference/policy-schema.md +++ b/docs/reference/policy-schema.md @@ -184,6 +184,12 @@ Used when `access` is not set. Each rule explicitly allows a method and path com | `allow.method` | string | Yes | HTTP method to allow (for example, `GET`, `POST`). | | `allow.path` | string | Yes | URL path pattern. Supports `*` and `**` glob syntax. | +**Path matching behavior:** + +- Path rules match only the path component of the request URI (everything before `?`). +- Query strings are not evaluated by path rules. A rule with `path: /api/v1/download` matches both `/api/v1/download` and `/api/v1/download?slug=my-skill&version=1.0`. +- Glob patterns use `/` as the segment delimiter. `*` matches within a single segment, `**` matches across segments. + Example with rules: ```yaml