From bce18ce478082be41203c7a178c6f72662fe560e Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Wed, 25 Mar 2026 16:03:26 -0700 Subject: [PATCH 1/2] feat(sandbox): add L7 query parameter matchers Signed-off-by: John Myers <9696606+johntmyers@users.noreply.github.com> --- architecture/sandbox.md | 6 +- architecture/security-policy.md | 8 +- crates/openshell-policy/src/lib.rs | 94 ++++++- .../data/sandbox-policy.rego | 50 ++++ crates/openshell-sandbox/src/l7/mod.rs | 178 ++++++++++++- crates/openshell-sandbox/src/l7/provider.rs | 3 + crates/openshell-sandbox/src/l7/relay.rs | 3 + crates/openshell-sandbox/src/l7/rest.rs | 136 +++++++++- .../src/mechanistic_mapper.rs | 1 + crates/openshell-sandbox/src/opa.rs | 209 ++++++++++++++- docs/reference/policy-schema.md | 6 + docs/sandboxes/policies.md | 26 ++ e2e/python/test_sandbox_policy.py | 246 ++++++++++++++++++ proto/sandbox.proto | 12 + 14 files changed, 959 insertions(+), 19 deletions(-) diff --git a/architecture/sandbox.md b/architecture/sandbox.md index 1117d0f7..0e0114d0 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -962,7 +962,7 @@ flowchart LR | `EnforcementMode` | `Audit`, `Enforce` | What to do on L7 deny (log-only vs block) | | `L7EndpointConfig` | `{ protocol, tls, enforcement }` | Per-endpoint L7 configuration | | `L7Decision` | `{ allowed, reason, matched_rule }` | Result of L7 evaluation | -| `L7RequestInfo` | `{ action, target }` | HTTP method + path for policy evaluation | +| `L7RequestInfo` | `{ action, target, query_params }` | HTTP method, path, and decoded query multimap for policy evaluation | ### Access presets @@ -1041,7 +1041,7 @@ This enables credential injection on all HTTPS endpoints automatically, without Implements `L7Provider` for HTTP/1.1: -- **`parse_request()`**: Reads up to 16 KiB of headers, parses the request line (method, path), determines body framing from `Content-Length` or `Transfer-Encoding: chunked` headers. Returns `L7Request` with raw header bytes (may include overflow body bytes). +- **`parse_request()`**: Reads up to 16 KiB of headers, parses the request line (method, path), decodes query parameters into a multimap, determines body framing from `Content-Length` or `Transfer-Encoding: chunked` headers. Returns `L7Request` with raw header bytes (may include overflow body bytes). - **`relay()`**: Forwards request headers and body to upstream (handling Content-Length, chunked, and no-body cases), then reads and relays the full response back to the client. @@ -1054,7 +1054,7 @@ Implements `L7Provider` for HTTP/1.1: `relay_with_inspection()` in `crates/openshell-sandbox/src/l7/relay.rs` is the main relay loop: 1. Parse one HTTP request from client via the provider -2. Build L7 input JSON with `request.method`, `request.path`, plus the CONNECT-level context (host, port, binary, ancestors, cmdline) +2. Build L7 input JSON with `request.method`, `request.path`, `request.query_params`, plus the CONNECT-level context (host, port, binary, ancestors, cmdline) 3. Evaluate `data.openshell.sandbox.allow_request` and `data.openshell.sandbox.request_deny_reason` 4. Log the L7 decision (tagged `L7_REQUEST`) 5. If allowed (or audit mode): relay request to upstream and response back to client, then loop diff --git a/architecture/security-policy.md b/architecture/security-policy.md index 44898d70..989b9a68 100644 --- a/architecture/security-policy.md +++ b/architecture/security-policy.md @@ -461,9 +461,14 @@ rules: - allow: method: GET path: "/repos/**" + query: + per_page: "1*" - allow: method: POST path: "/repos/*/issues" + query: + labels: + any: ["bug*", "p1*"] ``` #### `L7Allow` @@ -473,8 +478,9 @@ rules: | `method` | `string` | HTTP method: `GET`, `HEAD`, `POST`, `PUT`, `DELETE`, `PATCH`, `OPTIONS`, or `*` (any). Case-insensitive matching. | | `path` | `string` | URL path glob pattern: `**` matches everything, otherwise `glob.match` with `/` delimiter. | | `command` | `string` | SQL command: `SELECT`, `INSERT`, `UPDATE`, `DELETE`, or `*` (any). Case-insensitive matching. For `protocol: sql` endpoints. | +| `query` | `map` | Optional REST query rules keyed by decoded query param name. Value is either a glob string (for example, `tag: "foo-*"`) or `{ any: ["foo-*", "bar-*"] }`. | -Method and command fields use `*` as wildcard for "any". Path patterns use `**` for "match everything" and standard glob patterns with `/` as a delimiter otherwise. See `sandbox-policy.rego` -- `method_matches()`, `path_matches()`, `command_matches()`. +Method and command fields use `*` as wildcard for "any". Path patterns use `**` for "match everything" and standard glob patterns with `/` as a delimiter otherwise. Query matching is case-sensitive and evaluates decoded values; when duplicate keys are present in the request, every value for that key must match the configured matcher. See `sandbox-policy.rego` -- `method_matches()`, `path_matches()`, `command_matches()`, `query_params_match()`. #### Access Presets diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index f1c15539..7adb4dfd 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -15,8 +15,8 @@ use std::path::Path; use miette::{IntoDiagnostic, Result, WrapErr}; use openshell_core::proto::{ - FilesystemPolicy, L7Allow, L7Rule, LandlockPolicy, NetworkBinary, NetworkEndpoint, - NetworkPolicyRule, ProcessPolicy, SandboxPolicy, + FilesystemPolicy, L7Allow, L7QueryMatcher, L7Rule, LandlockPolicy, NetworkBinary, + NetworkEndpoint, NetworkPolicyRule, ProcessPolicy, SandboxPolicy, }; use serde::{Deserialize, Serialize}; @@ -120,6 +120,22 @@ struct L7AllowDef { path: String, #[serde(default, skip_serializing_if = "String::is_empty")] command: String, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + query: BTreeMap, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(untagged)] +enum QueryMatcherDef { + Glob(String), + Any(QueryAnyDef), +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct QueryAnyDef { + #[serde(default, skip_serializing_if = "Vec::is_empty")] + any: Vec, } #[derive(Debug, Serialize, Deserialize)] @@ -176,6 +192,23 @@ fn to_proto(raw: PolicyFile) -> SandboxPolicy { method: r.allow.method, path: r.allow.path, command: r.allow.command, + query: r + .allow + .query + .into_iter() + .map(|(key, matcher)| { + let proto = match matcher { + QueryMatcherDef::Glob(glob) => { + L7QueryMatcher { glob, any: vec![] } + } + QueryMatcherDef::Any(any) => L7QueryMatcher { + glob: String::new(), + any: any.any, + }, + }; + (key, proto) + }) + .collect(), }), }) .collect(), @@ -275,6 +308,20 @@ fn from_proto(policy: &SandboxPolicy) -> PolicyFile { method: a.method, path: a.path, command: a.command, + query: a + .query + .into_iter() + .map(|(key, matcher)| { + let yaml_matcher = if !matcher.any.is_empty() { + QueryMatcherDef::Any(QueryAnyDef { + any: matcher.any, + }) + } else { + QueryMatcherDef::Glob(matcher.glob) + }; + (key, yaml_matcher) + }) + .collect(), }, } }) @@ -754,6 +801,49 @@ network_policies: assert_eq!(rule.binaries[0].path, "/usr/bin/curl"); } + #[test] + fn parse_l7_query_matchers_and_round_trip() { + let yaml = r#" +version: 1 +network_policies: + query_test: + name: query_test + endpoints: + - host: api.example.com + port: 8080 + protocol: rest + rules: + - allow: + method: GET + path: /download + query: + slug: "my-*" + tag: + any: ["foo-*", "bar-*"] + binaries: + - path: /usr/bin/curl +"#; + let proto = parse_sandbox_policy(yaml).expect("parse failed"); + let allow = proto.network_policies["query_test"].endpoints[0].rules[0] + .allow + .as_ref() + .expect("allow"); + assert_eq!(allow.query["slug"].glob, "my-*"); + assert_eq!(allow.query["slug"].any, Vec::::new()); + assert_eq!(allow.query["tag"].any, vec!["foo-*", "bar-*"]); + assert!(allow.query["tag"].glob.is_empty()); + + let yaml_out = serialize_sandbox_policy(&proto).expect("serialize failed"); + let proto_round_trip = parse_sandbox_policy(&yaml_out).expect("re-parse failed"); + let allow_round_trip = proto_round_trip.network_policies["query_test"].endpoints[0].rules + [0] + .allow + .as_ref() + .expect("allow"); + assert_eq!(allow_round_trip.query["slug"].glob, "my-*"); + assert_eq!(allow_round_trip.query["tag"].any, vec!["foo-*", "bar-*"]); + } + #[test] fn parse_rejects_unknown_fields() { let yaml = "version: 1\nbogus_field: true\n"; diff --git a/crates/openshell-sandbox/data/sandbox-policy.rego b/crates/openshell-sandbox/data/sandbox-policy.rego index 1544dfe5..0a7a3388 100644 --- a/crates/openshell-sandbox/data/sandbox-policy.rego +++ b/crates/openshell-sandbox/data/sandbox-policy.rego @@ -208,6 +208,7 @@ request_allowed_for_endpoint(request, endpoint) if { rule.allow.method method_matches(request.method, rule.allow.method) path_matches(request.path, rule.allow.path) + query_params_match(request, rule) } # --- L7 rule matching: SQL command --- @@ -235,6 +236,55 @@ path_matches(actual, pattern) if { glob.match(pattern, ["/"], actual) } +# Query matching: +# - If no query rules are configured, allow any query params. +# - For configured keys, all request values for that key must match. +# - Matcher shape supports either `glob` or `any`. +query_params_match(request, rule) if { + query_rules := object.get(rule.allow, "query", {}) + not query_mismatch(request, query_rules) +} + +query_mismatch(request, query_rules) if { + some key + matcher := query_rules[key] + not query_key_matches(request, key, matcher) +} + +query_key_matches(request, key, matcher) if { + request_query := object.get(request, "query_params", {}) + values := object.get(request_query, key, null) + values != null + count(values) > 0 + not query_value_mismatch(values, matcher) +} + +query_value_mismatch(values, matcher) if { + some i + value := values[i] + not query_value_matches(value, matcher) +} + +query_value_matches(value, matcher) if { + is_string(matcher) + glob.match(matcher, [], value) +} + +query_value_matches(value, matcher) if { + is_object(matcher) + glob_pattern := object.get(matcher, "glob", "") + glob_pattern != "" + glob.match(glob_pattern, [], value) +} + +query_value_matches(value, matcher) if { + is_object(matcher) + any_patterns := object.get(matcher, "any", []) + count(any_patterns) > 0 + some i + glob.match(any_patterns[i], [], value) +} + # SQL command matching: "*" matches any; otherwise case-insensitive. command_matches(_, "*") if true diff --git a/crates/openshell-sandbox/src/l7/mod.rs b/crates/openshell-sandbox/src/l7/mod.rs index 09e54788..0d27374a 100644 --- a/crates/openshell-sandbox/src/l7/mod.rs +++ b/crates/openshell-sandbox/src/l7/mod.rs @@ -76,6 +76,8 @@ pub struct L7RequestInfo { pub action: String, /// Target: URL path for REST, or empty for SQL. pub target: String, + /// Decoded query parameter multimap for REST requests. + pub query_params: std::collections::HashMap>, } /// Parse an L7 endpoint config from a regorus Value (returned by Rego query). @@ -279,7 +281,7 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< "GET", "HEAD", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "*", ]; if let Some(rules) = ep.get("rules").and_then(|v| v.as_array()) { - for rule in rules { + for (rule_idx, rule) in rules.iter().enumerate() { if let Some(method) = rule .get("allow") .and_then(|a| a.get("method")) @@ -291,6 +293,88 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< "{loc}: Unknown HTTP method '{method}'. Standard methods: GET, HEAD, POST, PUT, DELETE, PATCH, OPTIONS." )); } + + let Some(query) = rule + .get("allow") + .and_then(|a| a.get("query")) + .filter(|v| !v.is_null()) + else { + continue; + }; + + let Some(query_obj) = query.as_object() else { + errors.push(format!( + "{loc}.rules[{rule_idx}].allow.query: expected map of query matchers" + )); + continue; + }; + + for (param, matcher) in query_obj { + if matcher.as_str().is_some() { + continue; + } + + let Some(matcher_obj) = matcher.as_object() else { + errors.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}: expected string glob or object with `any`" + )); + continue; + }; + + let has_any = matcher_obj.get("any").is_some(); + let has_glob = matcher_obj.get("glob").is_some(); + let has_unknown = matcher_obj.keys().any(|k| k != "any" && k != "glob"); + if has_unknown { + errors.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}: unknown matcher keys; only `glob` or `any` are supported" + )); + continue; + } + + if has_glob && has_any { + errors.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}: matcher cannot specify both `glob` and `any`" + )); + continue; + } + + if !has_glob && !has_any { + errors.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}: object matcher requires `glob` string or non-empty `any` list" + )); + continue; + } + + if has_glob { + if matcher_obj.get("glob").and_then(|v| v.as_str()).is_none() { + errors.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}.glob: expected glob string" + )); + } + continue; + } + + let any = matcher_obj.get("any").and_then(|v| v.as_array()); + let Some(any) = any else { + errors.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}.any: expected array of glob strings" + )); + continue; + }; + + if any.is_empty() { + errors.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}.any: list must not be empty" + )); + continue; + } + + if any.iter().any(|v| v.as_str().is_none()) { + errors.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}.any: all values must be strings" + )); + } + } } } } @@ -780,4 +864,96 @@ mod tests { "should have no tls warnings with auto-detect: {warnings:?}" ); } + + #[test] + fn validate_query_any_requires_non_empty_array() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "api.example.com", + "port": 8080, + "protocol": "rest", + "rules": [{ + "allow": { + "method": "GET", + "path": "/download", + "query": { + "tag": { "any": [] } + } + } + }] + }], + "binaries": [] + } + } + }); + let (errors, _warnings) = validate_l7_policies(&data); + assert!( + errors.iter().any(|e| e.contains("allow.query.tag.any")), + "expected query any validation error, got: {errors:?}" + ); + } + + #[test] + fn validate_query_object_rejects_unknown_keys() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "api.example.com", + "port": 8080, + "protocol": "rest", + "rules": [{ + "allow": { + "method": "GET", + "path": "/download", + "query": { + "tag": { "mode": "foo-*" } + } + } + }] + }], + "binaries": [] + } + } + }); + let (errors, _warnings) = validate_l7_policies(&data); + assert!( + errors.iter().any(|e| e.contains("unknown matcher keys")), + "expected unknown query matcher key error, got: {errors:?}" + ); + } + + #[test] + fn validate_query_string_and_any_matchers_are_accepted() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "api.example.com", + "port": 8080, + "protocol": "rest", + "rules": [{ + "allow": { + "method": "GET", + "path": "/download", + "query": { + "slug": "my-*", + "tag": { "any": ["foo-*", "bar-*"] }, + "owner": { "glob": "org-*" } + } + } + }] + }], + "binaries": [] + } + } + }); + let (errors, _warnings) = validate_l7_policies(&data); + assert!( + errors.is_empty(), + "valid query matcher shapes should not error: {errors:?}" + ); + } } diff --git a/crates/openshell-sandbox/src/l7/provider.rs b/crates/openshell-sandbox/src/l7/provider.rs index a9bf8bf5..df0dfb29 100644 --- a/crates/openshell-sandbox/src/l7/provider.rs +++ b/crates/openshell-sandbox/src/l7/provider.rs @@ -10,6 +10,7 @@ //! works for both plaintext TCP and TLS-terminated connections. use miette::Result; +use std::collections::HashMap; use std::future::Future; use tokio::io::{AsyncRead, AsyncWrite}; @@ -31,6 +32,8 @@ pub struct L7Request { pub action: String, /// Target: URL path for REST, empty for SQL. pub target: String, + /// Decoded query parameter multimap for REST requests. + pub query_params: HashMap>, /// Raw request header bytes (request line + headers for HTTP, message for SQL). /// May include overflow body bytes read during header parsing. pub raw_header: Vec, diff --git a/crates/openshell-sandbox/src/l7/relay.rs b/crates/openshell-sandbox/src/l7/relay.rs index 940e7f94..bfd5ac45 100644 --- a/crates/openshell-sandbox/src/l7/relay.rs +++ b/crates/openshell-sandbox/src/l7/relay.rs @@ -108,6 +108,7 @@ where let request_info = L7RequestInfo { action: req.action.clone(), target: req.target.clone(), + query_params: req.query_params.clone(), }; // Evaluate L7 policy via Rego @@ -127,6 +128,7 @@ where l7_protocol = "rest", l7_action = %request_info.action, l7_target = %request_info.target, + l7_query_params = ?request_info.query_params, l7_decision = decision_str, l7_deny_reason = %reason, "L7_REQUEST", @@ -198,6 +200,7 @@ fn evaluate_l7_request( "request": { "method": request.action, "path": request.target, + "query_params": request.query_params.clone(), } }); diff --git a/crates/openshell-sandbox/src/l7/rest.rs b/crates/openshell-sandbox/src/l7/rest.rs index ebb34957..7522b1c4 100644 --- a/crates/openshell-sandbox/src/l7/rest.rs +++ b/crates/openshell-sandbox/src/l7/rest.rs @@ -10,6 +10,7 @@ use crate::l7::provider::{BodyLength, L7Provider, L7Request}; use crate::secrets::rewrite_http_header_block; use miette::{IntoDiagnostic, Result, miette}; +use std::collections::HashMap; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use tracing::debug; @@ -116,7 +117,7 @@ async fn parse_http_request(client: &mut C) -> Result(client: &mut C) -> Result Result<(String, HashMap>)> { + match target.split_once('?') { + Some((path, query)) => Ok((path.to_string(), parse_query_params(query)?)), + None => Ok((target.to_string(), HashMap::new())), + } +} + +fn parse_query_params(query: &str) -> Result>> { + let mut params: HashMap> = HashMap::new(); + if query.is_empty() { + return Ok(params); + } + + for pair in query.split('&') { + if pair.is_empty() { + continue; + } + + let (raw_key, raw_value) = match pair.split_once('=') { + Some((key, value)) => (key, value), + None => (pair, ""), + }; + let key = decode_query_component(raw_key)?; + let value = decode_query_component(raw_value)?; + params.entry(key).or_default().push(value); + } + + Ok(params) +} + +fn decode_query_component(input: &str) -> Result { + let bytes = input.as_bytes(); + let mut decoded = Vec::with_capacity(bytes.len()); + let mut i = 0; + + while i < bytes.len() { + if bytes[i] != b'%' { + decoded.push(bytes[i]); + i += 1; + continue; + } + + if i + 2 >= bytes.len() { + return Err(miette!("Invalid percent-encoding in query component")); + } + + let hi = decode_hex_nibble(bytes[i + 1]) + .ok_or_else(|| miette!("Invalid percent-encoding in query component"))?; + let lo = decode_hex_nibble(bytes[i + 2]) + .ok_or_else(|| miette!("Invalid percent-encoding in query component"))?; + decoded.push((hi << 4) | lo); + i += 3; + } + + String::from_utf8(decoded).map_err(|_| miette!("Query component is not valid UTF-8")) +} + +fn decode_hex_nibble(byte: u8) -> Option { + match byte { + b'0'..=b'9' => Some(byte - b'0'), + b'a'..=b'f' => Some(byte - b'a' + 10), + b'A'..=b'F' => Some(byte - b'A' + 10), + _ => None, + } +} + /// Forward an allowed HTTP request to upstream and relay the response back. /// /// Returns `true` if the upstream connection is reusable, `false` if consumed. @@ -681,6 +750,39 @@ mod tests { } } + #[test] + fn parse_target_query_parses_duplicate_values() { + let (path, query) = parse_target_query("/download?tag=a&tag=b").expect("parse"); + assert_eq!(path, "/download"); + assert_eq!( + query.get("tag").cloned(), + Some(vec!["a".into(), "b".into()]) + ); + } + + #[test] + fn parse_target_query_decodes_percent_and_preserves_plus() { + let (path, query) = parse_target_query("/download?slug=my%2Fskill&name=Foo+Bar").unwrap(); + assert_eq!(path, "/download"); + assert_eq!( + query.get("slug").cloned(), + Some(vec!["my/skill".to_string()]) + ); + assert_eq!( + query.get("name").cloned(), + Some(vec!["Foo+Bar".to_string()]) + ); + } + + #[test] + fn parse_target_query_rejects_malformed_percent_encoding() { + let err = parse_target_query("/download?slug=bad%2").expect_err("expected parse error"); + assert!( + err.to_string().contains("percent-encoding"), + "unexpected error: {err}" + ); + } + /// SEC-009: Reject requests with both Content-Length and Transfer-Encoding /// to prevent CL/TE request smuggling (RFC 7230 Section 3.3.3). #[test] @@ -746,6 +848,32 @@ mod tests { assert!(result.is_err(), "Must reject unsupported HTTP version"); } + #[tokio::test] + async fn parse_http_request_splits_path_and_query_params() { + let (mut client, mut writer) = tokio::io::duplex(4096); + tokio::spawn(async move { + writer + .write_all( + b"GET /download?slug=my%2Fskill&tag=foo&tag=bar HTTP/1.1\r\nHost: x\r\n\r\n", + ) + .await + .unwrap(); + }); + let req = parse_http_request(&mut client) + .await + .expect("request should parse") + .expect("request should exist"); + assert_eq!(req.target, "/download"); + assert_eq!( + req.query_params.get("slug").cloned(), + Some(vec!["my/skill".to_string()]) + ); + assert_eq!( + req.query_params.get("tag").cloned(), + Some(vec!["foo".to_string(), "bar".to_string()]) + ); + } + /// Regression test: two pipelined requests in a single write must be /// parsed independently. Before the fix, the 1024-byte `read()` buffer /// could capture bytes from the second request, which were forwarded @@ -770,6 +898,7 @@ mod tests { .expect("expected first request"); assert_eq!(first.action, "GET"); assert_eq!(first.target, "/allowed"); + assert!(first.query_params.is_empty()); assert_eq!( first.raw_header, b"GET /allowed HTTP/1.1\r\nHost: example.com\r\n\r\n", "raw_header must contain only the first request's headers" @@ -781,6 +910,7 @@ mod tests { .expect("expected second request"); assert_eq!(second.action, "POST"); assert_eq!(second.target, "/blocked"); + assert!(second.query_params.is_empty()); } #[test] @@ -1133,7 +1263,7 @@ mod tests { /// to the upstream API, causing 401 Unauthorized errors. #[tokio::test] async fn relay_request_with_resolver_rewrites_credential_placeholders() { - let provider_env: std::collections::HashMap = [( + let provider_env: HashMap = [( "NVIDIA_API_KEY".to_string(), "nvapi-real-secret-key".to_string(), )] @@ -1149,6 +1279,7 @@ mod tests { let req = L7Request { action: "POST".to_string(), target: "/v1/chat/completions".to_string(), + query_params: HashMap::new(), raw_header: format!( "POST /v1/chat/completions HTTP/1.1\r\n\ Host: integrate.api.nvidia.com\r\n\ @@ -1232,6 +1363,7 @@ mod tests { let req = L7Request { action: "POST".to_string(), target: "/v1/chat/completions".to_string(), + query_params: HashMap::new(), raw_header: format!( "POST /v1/chat/completions HTTP/1.1\r\n\ Host: integrate.api.nvidia.com\r\n\ diff --git a/crates/openshell-sandbox/src/mechanistic_mapper.rs b/crates/openshell-sandbox/src/mechanistic_mapper.rs index e5ae6497..4fe90d08 100644 --- a/crates/openshell-sandbox/src/mechanistic_mapper.rs +++ b/crates/openshell-sandbox/src/mechanistic_mapper.rs @@ -337,6 +337,7 @@ fn build_l7_rules(samples: &HashMap<(String, String), u32>) -> Vec { method: method.clone(), path: generalised, command: String::new(), + query: HashMap::new(), }), }); } diff --git a/crates/openshell-sandbox/src/opa.rs b/crates/openshell-sandbox/src/opa.rs index cd2931b3..f1df12ff 100644 --- a/crates/openshell-sandbox/src/opa.rs +++ b/crates/openshell-sandbox/src/opa.rs @@ -667,13 +667,35 @@ fn proto_to_opa_data_json(proto: &ProtoSandboxPolicy) -> String { .iter() .map(|r| { let a = r.allow.as_ref(); - serde_json::json!({ - "allow": { - "method": a.map_or("", |a| &a.method), - "path": a.map_or("", |a| &a.path), - "command": a.map_or("", |a| &a.command), - } - }) + let mut allow = serde_json::json!({ + "method": a.map_or("", |a| &a.method), + "path": a.map_or("", |a| &a.path), + "command": a.map_or("", |a| &a.command), + }); + let query: serde_json::Map = a + .map(|allow| { + allow + .query + .iter() + .map(|(key, matcher)| { + let mut matcher_json = serde_json::json!({}); + if !matcher.glob.is_empty() { + matcher_json["glob"] = + matcher.glob.clone().into(); + } + if !matcher.any.is_empty() { + matcher_json["any"] = + matcher.any.clone().into(); + } + (key.clone(), matcher_json) + }) + .collect() + }) + .unwrap_or_default(); + if !query.is_empty() { + allow["query"] = query.into(); + } + serde_json::json!({ "allow": allow }) }) .collect(); ep["rules"] = rules.into(); @@ -714,8 +736,9 @@ mod tests { use super::*; use openshell_core::proto::{ - FilesystemPolicy as ProtoFs, NetworkBinary, NetworkEndpoint, NetworkPolicyRule, - ProcessPolicy as ProtoProc, SandboxPolicy as ProtoSandboxPolicy, + FilesystemPolicy as ProtoFs, L7Allow, L7QueryMatcher, L7Rule, NetworkBinary, + NetworkEndpoint, NetworkPolicyRule, ProcessPolicy as ProtoProc, + SandboxPolicy as ProtoSandboxPolicy, }; const TEST_POLICY: &str = include_str!("../data/sandbox-policy.rego"); @@ -1337,6 +1360,27 @@ network_policies: access: full binaries: - { path: /usr/bin/curl } + query_api: + name: query_api + endpoints: + - host: api.query.com + port: 8080 + protocol: rest + enforcement: enforce + rules: + - allow: + method: GET + path: "/download" + query: + tag: "foo-*" + - allow: + method: GET + path: "/search" + query: + tag: + any: ["foo-*", "bar-*"] + binaries: + - { path: /usr/bin/curl } l4_only: name: l4_only endpoints: @@ -1359,6 +1403,16 @@ process: } fn l7_input(host: &str, port: u16, method: &str, path: &str) -> serde_json::Value { + l7_input_with_query(host, port, method, path, serde_json::json!({})) + } + + fn l7_input_with_query( + host: &str, + port: u16, + method: &str, + path: &str, + query_params: serde_json::Value, + ) -> serde_json::Value { serde_json::json!({ "network": { "host": host, "port": port }, "exec": { @@ -1368,7 +1422,8 @@ process: }, "request": { "method": method, - "path": path + "path": path, + "query_params": query_params } }) } @@ -1472,6 +1527,140 @@ process: assert!(eval_l7(&engine, &input)); } + #[test] + fn l7_query_glob_allows_matching_duplicate_values() { + let engine = l7_engine(); + let input = l7_input_with_query( + "api.query.com", + 8080, + "GET", + "/download", + serde_json::json!({ + "tag": ["foo-a", "foo-b"], + "extra": ["ignored"], + }), + ); + assert!(eval_l7(&engine, &input)); + } + + #[test] + fn l7_query_glob_denies_on_mismatched_duplicate_value() { + let engine = l7_engine(); + let input = l7_input_with_query( + "api.query.com", + 8080, + "GET", + "/download", + serde_json::json!({ + "tag": ["foo-a", "evil"], + }), + ); + assert!(!eval_l7(&engine, &input)); + } + + #[test] + fn l7_query_any_allows_if_every_value_matches_any_pattern() { + let engine = l7_engine(); + let input = l7_input_with_query( + "api.query.com", + 8080, + "GET", + "/search", + serde_json::json!({ + "tag": ["foo-a", "bar-b"], + }), + ); + assert!(eval_l7(&engine, &input)); + } + + #[test] + fn l7_query_missing_required_key_denied() { + let engine = l7_engine(); + let input = l7_input_with_query( + "api.query.com", + 8080, + "GET", + "/download", + serde_json::json!({}), + ); + assert!(!eval_l7(&engine, &input)); + } + + #[test] + fn l7_query_rules_from_proto_are_enforced() { + let mut query = std::collections::HashMap::new(); + query.insert( + "tag".to_string(), + L7QueryMatcher { + glob: "foo-*".to_string(), + any: vec![], + }, + ); + + let mut network_policies = std::collections::HashMap::new(); + network_policies.insert( + "query_proto".to_string(), + NetworkPolicyRule { + name: "query_proto".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.proto.com".to_string(), + port: 8080, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "GET".to_string(), + path: "/download".to_string(), + command: String::new(), + query, + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }, + ); + + let proto = ProtoSandboxPolicy { + version: 1, + filesystem: Some(ProtoFs { + include_workdir: true, + read_only: vec![], + read_write: vec![], + }), + landlock: Some(openshell_core::proto::LandlockPolicy { + compatibility: "best_effort".to_string(), + }), + process: Some(ProtoProc { + run_as_user: "sandbox".to_string(), + run_as_group: "sandbox".to_string(), + }), + network_policies, + }; + + let engine = OpaEngine::from_proto(&proto).expect("engine from proto"); + let allow_input = l7_input_with_query( + "api.proto.com", + 8080, + "GET", + "/download", + serde_json::json!({ "tag": ["foo-a"] }), + ); + assert!(eval_l7(&engine, &allow_input)); + + let deny_input = l7_input_with_query( + "api.proto.com", + 8080, + "GET", + "/download", + serde_json::json!({ "tag": ["evil"] }), + ); + assert!(!eval_l7(&engine, &deny_input)); + } + #[test] fn l7_no_request_on_l4_only_endpoint() { // L4-only endpoint should not match L7 allow_request diff --git a/docs/reference/policy-schema.md b/docs/reference/policy-schema.md index cb37d0ba..0db712b4 100644 --- a/docs/reference/policy-schema.md +++ b/docs/reference/policy-schema.md @@ -183,6 +183,7 @@ Used when `access` is not set. Each rule explicitly allows a method and path com |---|---|---|---| | `allow.method` | string | Yes | HTTP method to allow (for example, `GET`, `POST`). | | `allow.path` | string | Yes | URL path pattern. Supports `*` and `**` glob syntax. | +| `allow.query` | map | No | Query parameter matchers keyed by decoded param name. Matcher value can be a glob string (`tag: "foo-*"`) or an object with `any` (`tag: { any: ["foo-*", "bar-*"] }`). | Example with rules: @@ -191,9 +192,14 @@ rules: - allow: method: GET path: /**/info/refs* + query: + service: "git-*" - allow: method: POST path: /**/git-upload-pack + query: + tag: + any: ["v1.*", "v2.*"] ``` ### Binary Object diff --git a/docs/sandboxes/policies.md b/docs/sandboxes/policies.md index 565a7a4c..2c64c9c1 100644 --- a/docs/sandboxes/policies.md +++ b/docs/sandboxes/policies.md @@ -256,6 +256,32 @@ Endpoints with `protocol: rest` enable HTTP request inspection. The proxy auto-d ::::: +### Query parameter matching + +REST rules can also constrain query parameter values: + +```yaml + download_api: + name: download_api + endpoints: + - host: api.example.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: + method: GET + path: "/api/v1/download" + query: + slug: "skill-*" + version: + any: ["1.*", "2.*"] + binaries: + - { path: /usr/bin/curl } +``` + +`query` matchers are case-sensitive and run on decoded values. If a request has duplicate keys (for example, `tag=a&tag=b`), every value for that key must match the configured glob(s). + ## Next Steps Explore related topics: diff --git a/e2e/python/test_sandbox_policy.py b/e2e/python/test_sandbox_policy.py index 1615bef6..6a4bf5ed 100644 --- a/e2e/python/test_sandbox_policy.py +++ b/e2e/python/test_sandbox_policy.py @@ -271,6 +271,90 @@ def fn(proxy_host, proxy_port, target_url): return fn +def _proxy_connect_then_http_with_server(): + """Return a closure that starts a local HTTP server and sends CONNECT+HTTP.""" + + def fn(proxy_host, proxy_port, target_host, target_port, method="GET", path="/"): + import json as _json + import socket + import threading + import time + from http.server import BaseHTTPRequestHandler, HTTPServer + + class Handler(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + body = b"connect-server-ok" + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_POST(self): + self.send_response(200) + body = b"connect-server-ok" + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, *args): + pass + + srv = HTTPServer(("0.0.0.0", int(target_port)), Handler) + threading.Thread(target=srv.handle_request, daemon=True).start() + time.sleep(0.5) + + conn = socket.create_connection((proxy_host, int(proxy_port)), timeout=10) + try: + conn.sendall( + f"CONNECT {target_host}:{target_port} HTTP/1.1\r\nHost: {target_host}\r\n\r\n".encode() + ) + connect_resp = conn.recv(256).decode("latin1") + if "200" not in connect_resp: + return _json.dumps( + {"connect_status": connect_resp.strip(), "http_status": 0} + ) + + request = ( + f"{method} {path} HTTP/1.1\r\nHost: {target_host}\r\nConnection: close\r\n\r\n" + ) + conn.sendall(request.encode()) + + data = b"" + conn.settimeout(5) + try: + while True: + chunk = conn.recv(4096) + if not chunk: + break + data += chunk + except socket.timeout: + pass + + response = data.decode("latin1", errors="replace") + status_line = response.split("\r\n")[0] if response else "" + status_code = ( + int(status_line.split()[1]) if len(status_line.split()) >= 2 else 0 + ) + + header_end = response.find("\r\n\r\n") + headers_raw = response[:header_end] if header_end > 0 else "" + body = response[header_end + 4 :] if header_end > 0 else "" + + return _json.dumps( + { + "connect_status": connect_resp.strip(), + "http_status": status_code, + "headers": headers_raw, + "body": body, + } + ) + finally: + conn.close() + srv.server_close() + + return fn + + def test_policy_applies_to_exec_commands( sandbox: Callable[..., Sandbox], ) -> None: @@ -796,6 +880,8 @@ def test_ssrf_loopback_blocked_even_with_allowed_ips( # L7-T6: L7 deny response is valid JSON with expected fields # L7-T7: L7 request logging includes structured fields # L7-T8: Port 443 + protocol=rest without tls=terminate warns (L7 not evaluated) +# L7-T9: Query matcher glob/any allows and denies as expected +# L7-T10: Rule without query matcher allows any query params # ============================================================================= @@ -1102,6 +1188,166 @@ def test_l7_tls_log_fields( assert "l7_decision" in log +def test_l7_query_matchers_enforced( + sandbox: Callable[..., Sandbox], +) -> None: + """L7-T9: Query matcher glob/any allows and denies as expected.""" + policy = _base_policy( + network_policies={ + "query_api": sandbox_pb2.NetworkPolicyRule( + name="query_api", + endpoints=[ + sandbox_pb2.NetworkEndpoint( + host=_SANDBOX_IP, + port=_FORWARD_PROXY_PORT, + protocol="rest", + enforcement="enforce", + allowed_ips=["10.200.0.0/24"], + rules=[ + sandbox_pb2.L7Rule( + allow=sandbox_pb2.L7Allow( + method="GET", + path="/download", + query={ + "tag": sandbox_pb2.L7QueryMatcher(glob="foo-*"), + }, + ), + ), + sandbox_pb2.L7Rule( + allow=sandbox_pb2.L7Allow( + method="GET", + path="/search", + query={ + "tag": sandbox_pb2.L7QueryMatcher( + any=["foo-*", "bar-*"] + ), + }, + ), + ), + ], + ), + ], + binaries=[sandbox_pb2.NetworkBinary(path="/**")], + ), + }, + ) + spec = datamodel_pb2.SandboxSpec(policy=policy) + with sandbox(spec=spec, delete_on_exit=True) as sb: + allowed = sb.exec_python( + _proxy_connect_then_http_with_server(), + args=( + _PROXY_HOST, + _PROXY_PORT, + _SANDBOX_IP, + _FORWARD_PROXY_PORT, + "GET", + "/download?tag=foo-a&tag=foo-b", + ), + ) + assert allowed.exit_code == 0, allowed.stderr + allowed_resp = json.loads(allowed.stdout) + assert "200" in allowed_resp["connect_status"] + assert allowed_resp["http_status"] == 200 + assert "connect-server-ok" in allowed_resp["body"] + + denied = sb.exec_python( + _proxy_connect_then_http_with_server(), + args=( + _PROXY_HOST, + _PROXY_PORT, + _SANDBOX_IP, + _FORWARD_PROXY_PORT, + "GET", + "/download?tag=foo-a&tag=evil", + ), + ) + assert denied.exit_code == 0, denied.stderr + denied_resp = json.loads(denied.stdout) + assert denied_resp["http_status"] == 403 + assert "policy_denied" in denied_resp["body"] + + any_allowed = sb.exec_python( + _proxy_connect_then_http_with_server(), + args=( + _PROXY_HOST, + _PROXY_PORT, + _SANDBOX_IP, + _FORWARD_PROXY_PORT, + "GET", + "/search?tag=foo-a&tag=bar-b", + ), + ) + assert any_allowed.exit_code == 0, any_allowed.stderr + any_resp = json.loads(any_allowed.stdout) + assert any_resp["http_status"] == 200 + assert "connect-server-ok" in any_resp["body"] + + missing_required = sb.exec_python( + _proxy_connect_then_http_with_server(), + args=( + _PROXY_HOST, + _PROXY_PORT, + _SANDBOX_IP, + _FORWARD_PROXY_PORT, + "GET", + "/download?slug=skill-1", + ), + ) + assert missing_required.exit_code == 0, missing_required.stderr + missing_resp = json.loads(missing_required.stdout) + assert missing_resp["http_status"] == 403 + assert "policy_denied" in missing_resp["body"] + + +def test_l7_rule_without_query_matcher_allows_any_query_params( + sandbox: Callable[..., Sandbox], +) -> None: + """L7-T10: Rule without query matcher allows any query params.""" + policy = _base_policy( + network_policies={ + "query_optional": sandbox_pb2.NetworkPolicyRule( + name="query_optional", + endpoints=[ + sandbox_pb2.NetworkEndpoint( + host=_SANDBOX_IP, + port=_FORWARD_PROXY_PORT, + protocol="rest", + enforcement="enforce", + allowed_ips=["10.200.0.0/24"], + rules=[ + sandbox_pb2.L7Rule( + allow=sandbox_pb2.L7Allow( + method="GET", + path="/download", + ), + ), + ], + ), + ], + binaries=[sandbox_pb2.NetworkBinary(path="/**")], + ), + }, + ) + spec = datamodel_pb2.SandboxSpec(policy=policy) + with sandbox(spec=spec, delete_on_exit=True) as sb: + result = sb.exec_python( + _proxy_connect_then_http_with_server(), + args=( + _PROXY_HOST, + _PROXY_PORT, + _SANDBOX_IP, + _FORWARD_PROXY_PORT, + "GET", + "/download?tag=anything&slug=any-value", + ), + ) + assert result.exit_code == 0, result.stderr + resp = json.loads(result.stdout) + assert "200" in resp["connect_status"] + assert resp["http_status"] == 200 + assert "connect-server-ok" in resp["body"] + + # ============================================================================= # Live policy update + log streaming tests # diff --git a/proto/sandbox.proto b/proto/sandbox.proto index a96ca33f..61948a52 100644 --- a/proto/sandbox.proto +++ b/proto/sandbox.proto @@ -100,6 +100,18 @@ message L7Allow { string path = 2; // SQL command (SQL): SELECT, INSERT, etc. or "*" for any. string command = 3; + // Query parameter matcher map (REST). + // Key is the decoded query parameter name (case-sensitive). + // Value supports either a single glob (`glob`) or a list (`any`). + map query = 4; +} + +// Query value matcher for one query parameter key. +message L7QueryMatcher { + // Single glob pattern. + string glob = 1; + // Any-of glob patterns. + repeated string any = 2; } // A binary identity for network policy matching. From b2a197b137920b4ebab1e39fbb2a0fd6617b1d13 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Wed, 25 Mar 2026 17:26:56 -0700 Subject: [PATCH 2/2] fix(sandbox): decode + as space in query params and validate glob syntax Three improvements from PR #617 review: 1. Decode + as space in query string values per the application/x-www-form-urlencoded convention. This matches Python's urllib.parse, JavaScript's URLSearchParams, Go's url.ParseQuery, and most HTTP frameworks. Literal + should be sent as %2B. 2. Add glob pattern syntax validation (warnings) for query matchers. Checks for unclosed brackets and braces in glob/any patterns. These are warnings (not errors) because OPA's glob.match is forgiving, but they surface likely typos during policy loading. 3. Add missing test cases: empty query values, keys without values, unicode after percent-decoding, empty query strings, and literal + via %2B encoding. --- crates/openshell-sandbox/src/l7/mod.rs | 183 +++++++++++++++++++++++- crates/openshell-sandbox/src/l7/rest.rs | 70 ++++++++- 2 files changed, 246 insertions(+), 7 deletions(-) diff --git a/crates/openshell-sandbox/src/l7/mod.rs b/crates/openshell-sandbox/src/l7/mod.rs index 0d27374a..880b6fd9 100644 --- a/crates/openshell-sandbox/src/l7/mod.rs +++ b/crates/openshell-sandbox/src/l7/mod.rs @@ -146,6 +146,49 @@ fn get_object_str(val: ®orus::Value, key: &str) -> Option { } } +/// Check a glob pattern for obvious syntax issues. +/// +/// Returns `Some(warning_message)` if the pattern looks malformed. +/// OPA's `glob.match` is forgiving, so these are warnings (not errors) +/// to surface likely typos without blocking policy loading. +fn check_glob_syntax(pattern: &str) -> Option { + let mut bracket_depth: i32 = 0; + for c in pattern.chars() { + match c { + '[' => bracket_depth += 1, + ']' => { + if bracket_depth == 0 { + return Some(format!("glob pattern '{pattern}' has unmatched ']'")); + } + bracket_depth -= 1; + } + _ => {} + } + } + if bracket_depth > 0 { + return Some(format!("glob pattern '{pattern}' has unclosed '['")); + } + + let mut brace_depth: i32 = 0; + for c in pattern.chars() { + match c { + '{' => brace_depth += 1, + '}' => { + if brace_depth == 0 { + return Some(format!("glob pattern '{pattern}' has unmatched '}}'")); + } + brace_depth -= 1; + } + _ => {} + } + } + if brace_depth > 0 { + return Some(format!("glob pattern '{pattern}' has unclosed '{{'")); + } + + None +} + /// Validate L7 policy configuration in the loaded OPA data. /// /// Returns a list of errors and warnings. Errors should prevent sandbox startup; @@ -310,7 +353,12 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< }; for (param, matcher) in query_obj { - if matcher.as_str().is_some() { + if let Some(glob_str) = matcher.as_str() { + if let Some(warning) = check_glob_syntax(glob_str) { + warnings.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}: {warning}" + )); + } continue; } @@ -346,10 +394,19 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< } if has_glob { - if matcher_obj.get("glob").and_then(|v| v.as_str()).is_none() { - errors.push(format!( - "{loc}.rules[{rule_idx}].allow.query.{param}.glob: expected glob string" - )); + match matcher_obj.get("glob").and_then(|v| v.as_str()) { + None => { + errors.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}.glob: expected glob string" + )); + } + Some(g) => { + if let Some(warning) = check_glob_syntax(g) { + warnings.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}.glob: {warning}" + )); + } + } } continue; } @@ -374,6 +431,14 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< "{loc}.rules[{rule_idx}].allow.query.{param}.any: all values must be strings" )); } + + for item in any.iter().filter_map(|v| v.as_str()) { + if let Some(warning) = check_glob_syntax(item) { + warnings.push(format!( + "{loc}.rules[{rule_idx}].allow.query.{param}.any: {warning}" + )); + } + } } } } @@ -925,6 +990,114 @@ mod tests { ); } + #[test] + fn validate_query_glob_warns_on_unclosed_bracket() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "api.example.com", + "port": 8080, + "protocol": "rest", + "rules": [{ + "allow": { + "method": "GET", + "path": "/download", + "query": { + "tag": "[unclosed" + } + } + }] + }], + "binaries": [] + } + } + }); + let (errors, warnings) = validate_l7_policies(&data); + assert!( + errors.is_empty(), + "malformed glob should warn, not error: {errors:?}" + ); + assert!( + warnings + .iter() + .any(|w| w.contains("unclosed '['") && w.contains("allow.query.tag")), + "expected glob syntax warning, got: {warnings:?}" + ); + } + + #[test] + fn validate_query_glob_warns_on_unclosed_brace() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "api.example.com", + "port": 8080, + "protocol": "rest", + "rules": [{ + "allow": { + "method": "GET", + "path": "/download", + "query": { + "format": { "glob": "{json,xml" } + } + } + }] + }], + "binaries": [] + } + } + }); + let (errors, warnings) = validate_l7_policies(&data); + assert!( + errors.is_empty(), + "malformed glob should warn, not error: {errors:?}" + ); + assert!( + warnings + .iter() + .any(|w| w.contains("unclosed '{'") && w.contains("allow.query.format.glob")), + "expected glob syntax warning, got: {warnings:?}" + ); + } + + #[test] + fn validate_query_any_warns_on_malformed_glob_item() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "api.example.com", + "port": 8080, + "protocol": "rest", + "rules": [{ + "allow": { + "method": "GET", + "path": "/download", + "query": { + "tag": { "any": ["valid-*", "[bad"] } + } + } + }] + }], + "binaries": [] + } + } + }); + let (errors, warnings) = validate_l7_policies(&data); + assert!( + errors.is_empty(), + "malformed glob in any should warn, not error: {errors:?}" + ); + assert!( + warnings + .iter() + .any(|w| w.contains("unclosed '['") && w.contains("allow.query.tag.any")), + "expected glob syntax warning for any item, got: {warnings:?}" + ); + } + #[test] fn validate_query_string_and_any_matchers_are_accepted() { let data = serde_json::json!({ diff --git a/crates/openshell-sandbox/src/l7/rest.rs b/crates/openshell-sandbox/src/l7/rest.rs index 7522b1c4..389fbba0 100644 --- a/crates/openshell-sandbox/src/l7/rest.rs +++ b/crates/openshell-sandbox/src/l7/rest.rs @@ -171,12 +171,25 @@ fn parse_query_params(query: &str) -> Result>> { Ok(params) } +/// Decode a single query string component (key or value). +/// +/// Handles both RFC 3986 percent-encoding (`%20` → space) and the +/// `application/x-www-form-urlencoded` convention (`+` → space). +/// Decoding `+` as space matches the behavior of Python's `urllib.parse`, +/// JavaScript's `URLSearchParams`, Go's `url.ParseQuery`, and most HTTP +/// frameworks. Callers that need a literal `+` should send `%2B`. fn decode_query_component(input: &str) -> Result { let bytes = input.as_bytes(); let mut decoded = Vec::with_capacity(bytes.len()); let mut i = 0; while i < bytes.len() { + if bytes[i] == b'+' { + decoded.push(b' '); + i += 1; + continue; + } + if bytes[i] != b'%' { decoded.push(bytes[i]); i += 1; @@ -761,16 +774,69 @@ mod tests { } #[test] - fn parse_target_query_decodes_percent_and_preserves_plus() { + fn parse_target_query_decodes_percent_and_plus() { let (path, query) = parse_target_query("/download?slug=my%2Fskill&name=Foo+Bar").unwrap(); assert_eq!(path, "/download"); assert_eq!( query.get("slug").cloned(), Some(vec!["my/skill".to_string()]) ); + // `+` is decoded as space per application/x-www-form-urlencoded. + // Literal `+` should be sent as `%2B`. assert_eq!( query.get("name").cloned(), - Some(vec!["Foo+Bar".to_string()]) + Some(vec!["Foo Bar".to_string()]) + ); + } + + #[test] + fn parse_target_query_literal_plus_via_percent_encoding() { + let (_path, query) = parse_target_query("/search?q=a%2Bb").unwrap(); + assert_eq!( + query.get("q").cloned(), + Some(vec!["a+b".to_string()]), + "%2B should decode to literal +" + ); + } + + #[test] + fn parse_target_query_empty_value() { + let (_path, query) = parse_target_query("/api?tag=").unwrap(); + assert_eq!( + query.get("tag").cloned(), + Some(vec!["".to_string()]), + "key with empty value should produce empty string" + ); + } + + #[test] + fn parse_target_query_key_without_value() { + let (_path, query) = parse_target_query("/api?verbose").unwrap(); + assert_eq!( + query.get("verbose").cloned(), + Some(vec!["".to_string()]), + "key without = should produce empty string value" + ); + } + + #[test] + fn parse_target_query_unicode_after_decoding() { + // "café" = c a f %C3%A9 + let (_path, query) = parse_target_query("/search?q=caf%C3%A9").unwrap(); + assert_eq!( + query.get("q").cloned(), + Some(vec!["café".to_string()]), + "percent-encoded UTF-8 should decode correctly" + ); + } + + #[test] + fn parse_target_query_empty_query_string() { + let (path, query) = parse_target_query("/api?").unwrap(); + assert_eq!(path, "/api"); + assert!( + query.is_empty(), + "empty query after ? should produce empty map" ); }