From a354e57659d26149fde0d91b76f83fce94e8f277 Mon Sep 17 00:00:00 2001 From: Mathijs van Veluw Date: Wed, 29 Apr 2026 22:20:59 +0200 Subject: [PATCH] Fix Host/IP resolving (#7162) IPv4 addresses can also be in decimal or hex formats. These were not checked during the Global IP check, and could bypass it. We now convert everything to the right format before running this check and it will catch these formats. Also updated the `is_global()` function to match Rust's still unstable version. And updated the Image Magic checks to be more precise and filter out any possible broken or invalid formats. While at it, also added several checks to ensure these special formatted IPv4 addresses are still blocked and punycode domains are also correctly resolved. Signed-off-by: BlackDex --- .pre-commit-config.yaml | 2 +- .typos.toml | 2 + src/api/icons.rs | 107 ++++++--------- src/http_client.rs | 282 +++++++++++++++++++++++++++++++++++++--- src/util.rs | 24 +++- 5 files changed, 323 insertions(+), 94 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b16ae4c6..f10cef65 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # When this version is updated, do not forget to update this in `.github/workflows/typos.yaml` too - repo: https://github.com/crate-ci/typos - rev: cf5f1c29a8ac336af8568821ec41919923b05a83 # v1.45.1 + rev: 7c572958218557a3272c2d6719629443b5cc26fd # v1.45.2 hooks: - id: typos diff --git a/.typos.toml b/.typos.toml index 59f6d7d6..87c0c4a6 100644 --- a/.typos.toml +++ b/.typos.toml @@ -23,4 +23,6 @@ extend-ignore-re = [ # https://github.com/bitwarden/server/blob/dff9f1cf538198819911cf2c20f8cda3307701c5/src/Notifications/HubHelpers.cs#L86 # https://github.com/bitwarden/clients/blob/9612a4ac45063e372a6fbe87eb253c7cb3c588fb/libs/common/src/auth/services/anonymous-hub.service.ts#L45 "AuthRequestResponseRecieved", + # Ignore Punycode/IDN tests + "xn--.+" ] diff --git a/src/api/icons.rs b/src/api/icons.rs index da83d0c4..b3a66f4d 100644 --- a/src/api/icons.rs +++ b/src/api/icons.rs @@ -19,7 +19,7 @@ use svg_hush::{data_url_filter, Filter}; use crate::{ config::PathType, error::Error, - http_client::{get_reqwest_client_builder, should_block_address, CustomHttpClientError}, + http_client::{get_reqwest_client_builder, get_valid_host, should_block_host, CustomHttpClientError}, util::Cached, CONFIG, }; @@ -81,19 +81,19 @@ static ICON_SIZE_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"(?x)(\d+ // The function name `icon_external` is checked in the `on_response` function in `AppHeaders` // It is used to prevent sending a specific header which breaks icon downloads. // If this function needs to be renamed, also adjust the code in `util.rs` -#[get("//icon.png")] -fn icon_external(domain: &str) -> Cached> { - if !is_valid_domain(domain) { - warn!("Invalid domain: {domain}"); +#[get("//icon.png")] +fn icon_external(host: &str) -> Cached> { + let Ok(host) = get_valid_host(host) else { + warn!("Invalid host: {host}"); + return Cached::ttl(None, CONFIG.icon_cache_negttl(), true); + }; + + if should_block_host(&host).is_err() { + warn!("Blocked address: {host}"); return Cached::ttl(None, CONFIG.icon_cache_negttl(), true); } - if should_block_address(domain) { - warn!("Blocked address: {domain}"); - return Cached::ttl(None, CONFIG.icon_cache_negttl(), true); - } - - let url = CONFIG._icon_service_url().replace("{}", domain); + let url = CONFIG._icon_service_url().replace("{}", &host.to_string()); let redir = match CONFIG.icon_redirect_code() { 301 => Some(Redirect::moved(url)), // legacy permanent redirect 302 => Some(Redirect::found(url)), // legacy temporary redirect @@ -107,12 +107,21 @@ fn icon_external(domain: &str) -> Cached> { Cached::ttl(redir, CONFIG.icon_cache_ttl(), true) } -#[get("//icon.png")] -async fn icon_internal(domain: &str) -> Cached<(ContentType, Vec)> { +#[get("//icon.png")] +async fn icon_internal(host: &str) -> Cached<(ContentType, Vec)> { const FALLBACK_ICON: &[u8] = include_bytes!("../static/images/fallback-icon.png"); - if !is_valid_domain(domain) { - warn!("Invalid domain: {domain}"); + let Ok(host) = get_valid_host(host) else { + warn!("Invalid host: {host}"); + return Cached::ttl( + (ContentType::new("image", "png"), FALLBACK_ICON.to_vec()), + CONFIG.icon_cache_negttl(), + true, + ); + }; + + if should_block_host(&host).is_err() { + warn!("Blocked address: {host}"); return Cached::ttl( (ContentType::new("image", "png"), FALLBACK_ICON.to_vec()), CONFIG.icon_cache_negttl(), @@ -120,16 +129,7 @@ async fn icon_internal(domain: &str) -> Cached<(ContentType, Vec)> { ); } - if should_block_address(domain) { - warn!("Blocked address: {domain}"); - return Cached::ttl( - (ContentType::new("image", "png"), FALLBACK_ICON.to_vec()), - CONFIG.icon_cache_negttl(), - true, - ); - } - - match get_icon(domain).await { + match get_icon(&host.to_string()).await { Some((icon, icon_type)) => { Cached::ttl((ContentType::new("image", icon_type), icon), CONFIG.icon_cache_ttl(), true) } @@ -137,42 +137,6 @@ async fn icon_internal(domain: &str) -> Cached<(ContentType, Vec)> { } } -/// Returns if the domain provided is valid or not. -/// -/// This does some manual checks and makes use of Url to do some basic checking. -/// domains can't be larger then 63 characters (not counting multiple subdomains) according to the RFC's, but we limit the total size to 255. -fn is_valid_domain(domain: &str) -> bool { - const ALLOWED_CHARS: &str = "-."; - - // If parsing the domain fails using Url, it will not work with reqwest. - if let Err(parse_error) = url::Url::parse(format!("https://{domain}").as_str()) { - debug!("Domain parse error: '{domain}' - {parse_error:?}"); - return false; - } else if domain.is_empty() - || domain.contains("..") - || domain.starts_with('.') - || domain.starts_with('-') - || domain.ends_with('-') - { - debug!( - "Domain validation error: '{domain}' is either empty, contains '..', starts with an '.', starts or ends with a '-'" - ); - return false; - } else if domain.len() > 255 { - debug!("Domain validation error: '{domain}' exceeds 255 characters"); - return false; - } - - for c in domain.chars() { - if !c.is_alphanumeric() && !ALLOWED_CHARS.contains(c) { - debug!("Domain validation error: '{domain}' contains an invalid character '{c}'"); - return false; - } - } - - true -} - async fn get_icon(domain: &str) -> Option<(Vec, String)> { let path = format!("{domain}.png"); @@ -367,7 +331,7 @@ async fn get_icon_url(domain: &str) -> Result { tld = domain_parts.next_back().unwrap(), base = domain_parts.next_back().unwrap() ); - if is_valid_domain(&base_domain) { + if get_valid_host(&base_domain).is_ok() { let sslbase = format!("https://{base_domain}"); let httpbase = format!("http://{base_domain}"); debug!("[get_icon_url]: Trying without subdomains '{base_domain}'"); @@ -378,7 +342,7 @@ async fn get_icon_url(domain: &str) -> Result { // When the domain is not an IP, and has less then 2 dots, try to add www. infront of it. } else if is_ip.is_err() && domain.matches('.').count() < 2 { let www_domain = format!("www.{domain}"); - if is_valid_domain(&www_domain) { + if get_valid_host(&www_domain).is_ok() { let sslwww = format!("https://{www_domain}"); let httpwww = format!("http://{www_domain}"); debug!("[get_icon_url]: Trying with www. prefix '{www_domain}'"); @@ -618,14 +582,17 @@ fn get_icon_type(bytes: &[u8]) -> Option<&'static str> { None } + // Some details can be found here: + // - https://www.garykessler.net/library/file_sigs_GCK_latest.html + // - https://en.wikipedia.org/wiki/List_of_file_signatures match bytes { - [137, 80, 78, 71, ..] => Some("png"), - [0, 0, 1, 0, ..] => Some("x-icon"), - [82, 73, 70, 70, ..] => Some("webp"), - [255, 216, 255, ..] => Some("jpeg"), - [71, 73, 70, 56, ..] => Some("gif"), - [66, 77, ..] => Some("bmp"), - [60, 115, 118, 103, ..] => Some("svg+xml"), // Normal svg + [137, 80, 78, 71, 13, 10, 26, 10, ..] => Some("png"), + [0, 0, 1, 0, n1, n2, ..] if u16::from_le_bytes([*n1, *n2]) > 0 => Some("x-icon"), // https://en.wikipedia.org/wiki/ICO_(file_format) + [82, 73, 70, 70, _, _, _, _, 87, 69, 66, 80, ..] => Some("webp"), // Only match WebP Images + [255, 216, 255, b, ..] if *b >= 0xC0 => Some("jpeg"), + [71, 73, 70, 56, 55 | 57, 97, ..] => Some("gif"), + [66, 77, _, _, _, _, 0, 0, 0, 0, ..] => Some("bmp"), // https://en.wikipedia.org/wiki/BMP_file_format + [60, 115, 118, 103, ..] => Some("svg+xml"), // Normal svg [60, 63, 120, 109, 108, ..] => check_svg_after_xml_declaration(bytes), // An svg starting with None, } diff --git a/src/http_client.rs b/src/http_client.rs index df52e2bc..d39b884d 100644 --- a/src/http_client.rs +++ b/src/http_client.rs @@ -1,7 +1,6 @@ use std::{ fmt, net::{IpAddr, SocketAddr}, - str::FromStr, sync::{Arc, LazyLock, Mutex}, time::Duration, }; @@ -59,16 +58,6 @@ pub fn get_reqwest_client_builder() -> ClientBuilder { .timeout(Duration::from_secs(10)) } -pub fn should_block_address(domain_or_ip: &str) -> bool { - if let Ok(ip) = IpAddr::from_str(domain_or_ip) { - if should_block_ip(ip) { - return true; - } - } - - should_block_address_regex(domain_or_ip) -} - fn should_block_ip(ip: IpAddr) -> bool { if !CONFIG.http_request_block_non_global_ips() { return false; @@ -100,11 +89,54 @@ fn should_block_address_regex(domain_or_ip: &str) -> bool { is_match } -fn should_block_host(host: &Host<&str>) -> Result<(), CustomHttpClientError> { +pub fn get_valid_host(host: &str) -> Result { + let Ok(host) = Host::parse(host) else { + return Err(CustomHttpClientError::Invalid { + domain: host.to_string(), + }); + }; + + // Some extra checks to validate hosts + match host { + Host::Domain(ref domain) => { + // Host::parse() does not verify length or all possible invalid characters + // We do some extra checks here to prevent issues + if domain.len() > 253 { + debug!("Domain validation error: '{domain}' exceeds 253 characters"); + return Err(CustomHttpClientError::Invalid { + domain: host.to_string(), + }); + } + if !domain.split('.').all(|label| { + !label.is_empty() + // Labels can't be longer than 63 chars + && label.len() <= 63 + // Labels are not allowed to start or end with a hyphen `-` + && !label.starts_with('-') + && !label.ends_with('-') + // Only ASCII Alphanumeric characters are allowed + // We already received a punycoded domain back, so no unicode should exists here + && label.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') + }) { + debug!( + "Domain validation error: '{domain}' labels contain invalid characters or exceed the maximum length" + ); + return Err(CustomHttpClientError::Invalid { + domain: host.to_string(), + }); + } + } + Host::Ipv4(_) | Host::Ipv6(_) => {} + } + + Ok(host) +} + +pub fn should_block_host>(host: &Host) -> Result<(), CustomHttpClientError> { let (ip, host_str): (Option, String) = match host { Host::Ipv4(ip) => (Some(IpAddr::V4(*ip)), ip.to_string()), Host::Ipv6(ip) => (Some(IpAddr::V6(*ip)), ip.to_string()), - Host::Domain(d) => (None, (*d).to_string()), + Host::Domain(d) => (None, d.as_ref().to_string()), }; if let Some(ip) = ip { @@ -134,6 +166,9 @@ pub enum CustomHttpClientError { domain: Option, ip: IpAddr, }, + Invalid { + domain: String, + }, } impl CustomHttpClientError { @@ -155,7 +190,7 @@ impl fmt::Display for CustomHttpClientError { match self { Self::Blocked { domain, - } => write!(f, "Blocked domain: {domain} matched HTTP_REQUEST_BLOCK_REGEX"), + } => write!(f, "Blocked domain: '{domain}' matched HTTP_REQUEST_BLOCK_REGEX"), Self::NonGlobalIp { domain: Some(domain), ip, @@ -163,7 +198,10 @@ impl fmt::Display for CustomHttpClientError { Self::NonGlobalIp { domain: None, ip, - } => write!(f, "IP {ip} is not a global IP!"), + } => write!(f, "IP '{ip}' is not a global IP!"), + Self::Invalid { + domain, + } => write!(f, "Invalid host: '{domain}' contains invalid characters or exceeds the maximum length"), } } } @@ -217,7 +255,13 @@ impl CustomDnsResolver { } fn pre_resolve(name: &str) -> Result<(), CustomHttpClientError> { - if should_block_address(name) { + let Ok(host) = get_valid_host(name) else { + return Err(CustomHttpClientError::Invalid { + domain: name.to_string(), + }); + }; + + if should_block_host(&host).is_err() { return Err(CustomHttpClientError::Blocked { domain: name.to_string(), }); @@ -308,3 +352,209 @@ pub(crate) mod aws { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::util::is_global_hardcoded; + use std::net::Ipv4Addr; + use url::Host; + + // === + // IPv4 numeric-format normalization + fn parse_to_ip(s: &str) -> Option { + match Host::parse(s).ok()? { + Host::Ipv4(v4) => Some(IpAddr::V4(v4)), + Host::Ipv6(v6) => Some(IpAddr::V6(v6)), + Host::Domain(_) => None, + } + } + + #[test] + fn dotted_decimal_loopback_normalizes() { + let ip = parse_to_ip("127.0.0.1").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn single_decimal_loopback_normalizes() { + // 127.0.0.1 == 2130706433 + let ip = parse_to_ip("2130706433").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn hex_loopback_normalizes() { + let ip = parse_to_ip("0x7f000001").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn dotted_hex_loopback_normalizes() { + let ip = parse_to_ip("0x7f.0.0.1").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn octal_loopback_normalizes() { + // 017700000001 == 127.0.0.1 + let ip = parse_to_ip("017700000001").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn dotted_octal_loopback_normalizes() { + let ip = parse_to_ip("0177.0.0.01").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn aws_metadata_decimal_blocked() { + // 169.254.169.254 == 2852039166 (link-local, AWS IMDS) + let ip = parse_to_ip("2852039166").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(169, 254, 169, 254))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn rfc1918_hex_blocked() { + // 10.0.0.1 + let ip = parse_to_ip("0x0a000001").unwrap(); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn public_ip_decimal_allowed() { + // 8.8.8.8 == 134744072 + let ip = parse_to_ip("134744072").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(8, 8, 8, 8))); + assert!(is_global_hardcoded(ip)); + } + + // === + // get_valid_host integration: numeric forms become Host::Ipv4 + #[test] + fn get_valid_host_normalizes_decimal_int() { + let h = get_valid_host("2130706433").expect("valid"); + assert!(matches!(h, Host::Ipv4(ip) if ip == Ipv4Addr::new(127, 0, 0, 1))); + } + + #[test] + fn get_valid_host_normalizes_hex() { + let h = get_valid_host("0x7f000001").expect("valid"); + assert!(matches!(h, Host::Ipv4(ip) if ip == Ipv4Addr::new(127, 0, 0, 1))); + } + + #[test] + fn get_valid_host_normalizes_octal() { + let h = get_valid_host("017700000001").expect("valid"); + assert!(matches!(h, Host::Ipv4(ip) if ip == Ipv4Addr::new(127, 0, 0, 1))); + } + + // === + // IPv6 formats + #[test] + fn ipv6_loopback_blocked() { + let h = get_valid_host("[::1]").expect("valid"); + let Host::Ipv6(ip) = h else { + panic!("expected v6") + }; + assert!(!is_global_hardcoded(IpAddr::V6(ip))); + } + + #[test] + fn ipv4_mapped_in_ipv6_loopback_blocked() { + // ::ffff:127.0.0.1 — v4-mapped form; is_global_hardcoded blocks via ::ffff:0:0/96 + let h = get_valid_host("[::ffff:127.0.0.1]").expect("valid"); + let Host::Ipv6(ip) = h else { + panic!("expected v6") + }; + assert!(!is_global_hardcoded(IpAddr::V6(ip))); + } + + #[test] + fn ipv6_unique_local_blocked() { + let h = get_valid_host("[fc00::1]").expect("valid"); + let Host::Ipv6(ip) = h else { + panic!("expected v6") + }; + assert!(!is_global_hardcoded(IpAddr::V6(ip))); + } + + // === + // Punycode / IDN + #[test] + fn punycode_passthrough() { + let h = get_valid_host("xn--deadbeafcaf-lbb.test").expect("valid"); + match h { + Host::Domain(d) => assert_eq!(d, "xn--deadbeafcaf-lbb.test"), + _ => panic!("expected domain"), + } + } + + #[test] + fn idn_unicode_gets_punycoded() { + let h = get_valid_host("deadbeafcafé.test").expect("valid"); + match h { + Host::Domain(d) => assert_eq!(d, "xn--deadbeafcaf-lbb.test"), + _ => panic!("expected domain"), + } + } + + #[test] + fn idn_unicode_gets_punycoded_tld() { + let h = get_valid_host("deadbeaf.café").expect("valid"); + match h { + Host::Domain(d) => assert_eq!(d, "deadbeaf.xn--caf-dma"), + _ => panic!("expected domain"), + } + } + + #[test] + fn idn_emoji_gets_punycoded() { + let h = get_valid_host("xn--t88h.test").expect("valid"); // 🛡️.test + match h { + Host::Domain(d) => assert_eq!(d, "xn--t88h.test"), + _ => panic!("expected domain"), + } + } + + #[test] + fn idn_unicode_to_punycode_roundtrip() { + let from_unicode = get_valid_host("🛡️.test").expect("valid"); + let from_puny = get_valid_host("xn--t88h.test").expect("valid"); + match (from_unicode, from_puny) { + (Host::Domain(a), Host::Domain(b)) => assert_eq!(a, b), + _ => panic!("expected domains"), + } + } + + #[test] + fn invalid_punycode_rejected() { + // bare invalid punycode + assert!(get_valid_host("xn--").is_err()); + } + + #[test] + fn underscore_in_label_rejected() { + assert!(get_valid_host("dead_beaf.cafe").is_err()); + } + + #[test] + fn label_too_long_rejected() { + let label = "a".repeat(64); + assert!(get_valid_host(&format!("{label}.test")).is_err()); + } + + #[test] + fn domain_too_long_rejected() { + let big = "a.".repeat(130) + "test"; // > 253 + assert!(get_valid_host(&big).is_err()); + } +} diff --git a/src/util.rs b/src/util.rs index 06f00b98..5cd78eed 100644 --- a/src/util.rs +++ b/src/util.rs @@ -818,14 +818,18 @@ pub fn is_global_hardcoded(ip: std::net::IpAddr) -> bool { std::net::IpAddr::V4(ip) => { !(ip.octets()[0] == 0 // "This network" || ip.is_private() - || (ip.octets()[0] == 100 && (ip.octets()[1] & 0b1100_0000 == 0b0100_0000)) //ip.is_shared() + || (ip.octets()[0] == 100 && (ip.octets()[1] & 0b1100_0000 == 0b0100_0000)) // ip.is_shared() || ip.is_loopback() || ip.is_link_local() // addresses reserved for future protocols (`192.0.0.0/24`) - ||(ip.octets()[0] == 192 && ip.octets()[1] == 0 && ip.octets()[2] == 0) + // .9 and .10 are documented as globally reachable so they're excluded + || ( + ip.octets()[0] == 192 && ip.octets()[1] == 0 && ip.octets()[2] == 0 + && ip.octets()[3] != 9 && ip.octets()[3] != 10 + ) || ip.is_documentation() || (ip.octets()[0] == 198 && (ip.octets()[1] & 0xfe) == 18) // ip.is_benchmarking() - || (ip.octets()[0] & 240 == 240 && !ip.is_broadcast()) //ip.is_reserved() + || (ip.octets()[0] & 240 == 240 && !ip.is_broadcast()) // ip.is_reserved() || ip.is_broadcast()) } std::net::IpAddr::V6(ip) => { @@ -849,11 +853,17 @@ pub fn is_global_hardcoded(ip: std::net::IpAddr) -> bool { // AS112-v6 (`2001:4:112::/48`) || matches!(ip.segments(), [0x2001, 4, 0x112, _, _, _, _, _]) // ORCHIDv2 (`2001:20::/28`) - || matches!(ip.segments(), [0x2001, b, _, _, _, _, _, _] if (0x20..=0x2F).contains(&b)) + // Drone Remote ID Protocol Entity Tags (DETs) Prefix (`2001:30::/28`)` + || matches!(ip.segments(), [0x2001, b, _, _, _, _, _, _] if (0x20..=0x3F).contains(&b)) )) - || ((ip.segments()[0] == 0x2001) && (ip.segments()[1] == 0xdb8)) // ip.is_documentation() - || ((ip.segments()[0] & 0xfe00) == 0xfc00) //ip.is_unique_local() - || ((ip.segments()[0] & 0xffc0) == 0xfe80)) //ip.is_unicast_link_local() + // 6to4 (`2002::/16`) – it's not explicitly documented as globally reachable, + // IANA says N/A. + || matches!(ip.segments(), [0x2002, _, _, _, _, _, _, _]) + || matches!(ip.segments(), [0x2001, 0xdb8, ..] | [0x3fff, 0..=0x0fff, ..]) // ip.is_documentation() + // Segment Routing (SRv6) SIDs (`5f00::/16`) + || matches!(ip.segments(), [0x5f00, ..]) + || ip.is_unique_local() + || ip.is_unicast_link_local()) } } }