diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b16ae4c6..f10cef65 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # When this version is updated, do not forget to update this in `.github/workflows/typos.yaml` too - repo: https://github.com/crate-ci/typos - rev: cf5f1c29a8ac336af8568821ec41919923b05a83 # v1.45.1 + rev: 7c572958218557a3272c2d6719629443b5cc26fd # v1.45.2 hooks: - id: typos diff --git a/.typos.toml b/.typos.toml index 59f6d7d6..87c0c4a6 100644 --- a/.typos.toml +++ b/.typos.toml @@ -23,4 +23,6 @@ extend-ignore-re = [ # https://github.com/bitwarden/server/blob/dff9f1cf538198819911cf2c20f8cda3307701c5/src/Notifications/HubHelpers.cs#L86 # https://github.com/bitwarden/clients/blob/9612a4ac45063e372a6fbe87eb253c7cb3c588fb/libs/common/src/auth/services/anonymous-hub.service.ts#L45 "AuthRequestResponseRecieved", + # Ignore Punycode/IDN tests + "xn--.+" ] diff --git a/src/api/icons.rs b/src/api/icons.rs index da83d0c4..b3a66f4d 100644 --- a/src/api/icons.rs +++ b/src/api/icons.rs @@ -19,7 +19,7 @@ use svg_hush::{data_url_filter, Filter}; use crate::{ config::PathType, error::Error, - http_client::{get_reqwest_client_builder, should_block_address, CustomHttpClientError}, + http_client::{get_reqwest_client_builder, get_valid_host, should_block_host, CustomHttpClientError}, util::Cached, CONFIG, }; @@ -81,19 +81,19 @@ static ICON_SIZE_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"(?x)(\d+ // The function name `icon_external` is checked in the `on_response` function in `AppHeaders` // It is used to prevent sending a specific header which breaks icon downloads. // If this function needs to be renamed, also adjust the code in `util.rs` -#[get("//icon.png")] -fn icon_external(domain: &str) -> Cached> { - if !is_valid_domain(domain) { - warn!("Invalid domain: {domain}"); +#[get("//icon.png")] +fn icon_external(host: &str) -> Cached> { + let Ok(host) = get_valid_host(host) else { + warn!("Invalid host: {host}"); + return Cached::ttl(None, CONFIG.icon_cache_negttl(), true); + }; + + if should_block_host(&host).is_err() { + warn!("Blocked address: {host}"); return Cached::ttl(None, CONFIG.icon_cache_negttl(), true); } - if should_block_address(domain) { - warn!("Blocked address: {domain}"); - return Cached::ttl(None, CONFIG.icon_cache_negttl(), true); - } - - let url = CONFIG._icon_service_url().replace("{}", domain); + let url = CONFIG._icon_service_url().replace("{}", &host.to_string()); let redir = match CONFIG.icon_redirect_code() { 301 => Some(Redirect::moved(url)), // legacy permanent redirect 302 => Some(Redirect::found(url)), // legacy temporary redirect @@ -107,12 +107,21 @@ fn icon_external(domain: &str) -> Cached> { Cached::ttl(redir, CONFIG.icon_cache_ttl(), true) } -#[get("//icon.png")] -async fn icon_internal(domain: &str) -> Cached<(ContentType, Vec)> { +#[get("//icon.png")] +async fn icon_internal(host: &str) -> Cached<(ContentType, Vec)> { const FALLBACK_ICON: &[u8] = include_bytes!("../static/images/fallback-icon.png"); - if !is_valid_domain(domain) { - warn!("Invalid domain: {domain}"); + let Ok(host) = get_valid_host(host) else { + warn!("Invalid host: {host}"); + return Cached::ttl( + (ContentType::new("image", "png"), FALLBACK_ICON.to_vec()), + CONFIG.icon_cache_negttl(), + true, + ); + }; + + if should_block_host(&host).is_err() { + warn!("Blocked address: {host}"); return Cached::ttl( (ContentType::new("image", "png"), FALLBACK_ICON.to_vec()), CONFIG.icon_cache_negttl(), @@ -120,16 +129,7 @@ async fn icon_internal(domain: &str) -> Cached<(ContentType, Vec)> { ); } - if should_block_address(domain) { - warn!("Blocked address: {domain}"); - return Cached::ttl( - (ContentType::new("image", "png"), FALLBACK_ICON.to_vec()), - CONFIG.icon_cache_negttl(), - true, - ); - } - - match get_icon(domain).await { + match get_icon(&host.to_string()).await { Some((icon, icon_type)) => { Cached::ttl((ContentType::new("image", icon_type), icon), CONFIG.icon_cache_ttl(), true) } @@ -137,42 +137,6 @@ async fn icon_internal(domain: &str) -> Cached<(ContentType, Vec)> { } } -/// Returns if the domain provided is valid or not. -/// -/// This does some manual checks and makes use of Url to do some basic checking. -/// domains can't be larger then 63 characters (not counting multiple subdomains) according to the RFC's, but we limit the total size to 255. -fn is_valid_domain(domain: &str) -> bool { - const ALLOWED_CHARS: &str = "-."; - - // If parsing the domain fails using Url, it will not work with reqwest. - if let Err(parse_error) = url::Url::parse(format!("https://{domain}").as_str()) { - debug!("Domain parse error: '{domain}' - {parse_error:?}"); - return false; - } else if domain.is_empty() - || domain.contains("..") - || domain.starts_with('.') - || domain.starts_with('-') - || domain.ends_with('-') - { - debug!( - "Domain validation error: '{domain}' is either empty, contains '..', starts with an '.', starts or ends with a '-'" - ); - return false; - } else if domain.len() > 255 { - debug!("Domain validation error: '{domain}' exceeds 255 characters"); - return false; - } - - for c in domain.chars() { - if !c.is_alphanumeric() && !ALLOWED_CHARS.contains(c) { - debug!("Domain validation error: '{domain}' contains an invalid character '{c}'"); - return false; - } - } - - true -} - async fn get_icon(domain: &str) -> Option<(Vec, String)> { let path = format!("{domain}.png"); @@ -367,7 +331,7 @@ async fn get_icon_url(domain: &str) -> Result { tld = domain_parts.next_back().unwrap(), base = domain_parts.next_back().unwrap() ); - if is_valid_domain(&base_domain) { + if get_valid_host(&base_domain).is_ok() { let sslbase = format!("https://{base_domain}"); let httpbase = format!("http://{base_domain}"); debug!("[get_icon_url]: Trying without subdomains '{base_domain}'"); @@ -378,7 +342,7 @@ async fn get_icon_url(domain: &str) -> Result { // When the domain is not an IP, and has less then 2 dots, try to add www. infront of it. } else if is_ip.is_err() && domain.matches('.').count() < 2 { let www_domain = format!("www.{domain}"); - if is_valid_domain(&www_domain) { + if get_valid_host(&www_domain).is_ok() { let sslwww = format!("https://{www_domain}"); let httpwww = format!("http://{www_domain}"); debug!("[get_icon_url]: Trying with www. prefix '{www_domain}'"); @@ -618,14 +582,17 @@ fn get_icon_type(bytes: &[u8]) -> Option<&'static str> { None } + // Some details can be found here: + // - https://www.garykessler.net/library/file_sigs_GCK_latest.html + // - https://en.wikipedia.org/wiki/List_of_file_signatures match bytes { - [137, 80, 78, 71, ..] => Some("png"), - [0, 0, 1, 0, ..] => Some("x-icon"), - [82, 73, 70, 70, ..] => Some("webp"), - [255, 216, 255, ..] => Some("jpeg"), - [71, 73, 70, 56, ..] => Some("gif"), - [66, 77, ..] => Some("bmp"), - [60, 115, 118, 103, ..] => Some("svg+xml"), // Normal svg + [137, 80, 78, 71, 13, 10, 26, 10, ..] => Some("png"), + [0, 0, 1, 0, n1, n2, ..] if u16::from_le_bytes([*n1, *n2]) > 0 => Some("x-icon"), // https://en.wikipedia.org/wiki/ICO_(file_format) + [82, 73, 70, 70, _, _, _, _, 87, 69, 66, 80, ..] => Some("webp"), // Only match WebP Images + [255, 216, 255, b, ..] if *b >= 0xC0 => Some("jpeg"), + [71, 73, 70, 56, 55 | 57, 97, ..] => Some("gif"), + [66, 77, _, _, _, _, 0, 0, 0, 0, ..] => Some("bmp"), // https://en.wikipedia.org/wiki/BMP_file_format + [60, 115, 118, 103, ..] => Some("svg+xml"), // Normal svg [60, 63, 120, 109, 108, ..] => check_svg_after_xml_declaration(bytes), // An svg starting with None, } diff --git a/src/http_client.rs b/src/http_client.rs index df52e2bc..d39b884d 100644 --- a/src/http_client.rs +++ b/src/http_client.rs @@ -1,7 +1,6 @@ use std::{ fmt, net::{IpAddr, SocketAddr}, - str::FromStr, sync::{Arc, LazyLock, Mutex}, time::Duration, }; @@ -59,16 +58,6 @@ pub fn get_reqwest_client_builder() -> ClientBuilder { .timeout(Duration::from_secs(10)) } -pub fn should_block_address(domain_or_ip: &str) -> bool { - if let Ok(ip) = IpAddr::from_str(domain_or_ip) { - if should_block_ip(ip) { - return true; - } - } - - should_block_address_regex(domain_or_ip) -} - fn should_block_ip(ip: IpAddr) -> bool { if !CONFIG.http_request_block_non_global_ips() { return false; @@ -100,11 +89,54 @@ fn should_block_address_regex(domain_or_ip: &str) -> bool { is_match } -fn should_block_host(host: &Host<&str>) -> Result<(), CustomHttpClientError> { +pub fn get_valid_host(host: &str) -> Result { + let Ok(host) = Host::parse(host) else { + return Err(CustomHttpClientError::Invalid { + domain: host.to_string(), + }); + }; + + // Some extra checks to validate hosts + match host { + Host::Domain(ref domain) => { + // Host::parse() does not verify length or all possible invalid characters + // We do some extra checks here to prevent issues + if domain.len() > 253 { + debug!("Domain validation error: '{domain}' exceeds 253 characters"); + return Err(CustomHttpClientError::Invalid { + domain: host.to_string(), + }); + } + if !domain.split('.').all(|label| { + !label.is_empty() + // Labels can't be longer than 63 chars + && label.len() <= 63 + // Labels are not allowed to start or end with a hyphen `-` + && !label.starts_with('-') + && !label.ends_with('-') + // Only ASCII Alphanumeric characters are allowed + // We already received a punycoded domain back, so no unicode should exists here + && label.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') + }) { + debug!( + "Domain validation error: '{domain}' labels contain invalid characters or exceed the maximum length" + ); + return Err(CustomHttpClientError::Invalid { + domain: host.to_string(), + }); + } + } + Host::Ipv4(_) | Host::Ipv6(_) => {} + } + + Ok(host) +} + +pub fn should_block_host>(host: &Host) -> Result<(), CustomHttpClientError> { let (ip, host_str): (Option, String) = match host { Host::Ipv4(ip) => (Some(IpAddr::V4(*ip)), ip.to_string()), Host::Ipv6(ip) => (Some(IpAddr::V6(*ip)), ip.to_string()), - Host::Domain(d) => (None, (*d).to_string()), + Host::Domain(d) => (None, d.as_ref().to_string()), }; if let Some(ip) = ip { @@ -134,6 +166,9 @@ pub enum CustomHttpClientError { domain: Option, ip: IpAddr, }, + Invalid { + domain: String, + }, } impl CustomHttpClientError { @@ -155,7 +190,7 @@ impl fmt::Display for CustomHttpClientError { match self { Self::Blocked { domain, - } => write!(f, "Blocked domain: {domain} matched HTTP_REQUEST_BLOCK_REGEX"), + } => write!(f, "Blocked domain: '{domain}' matched HTTP_REQUEST_BLOCK_REGEX"), Self::NonGlobalIp { domain: Some(domain), ip, @@ -163,7 +198,10 @@ impl fmt::Display for CustomHttpClientError { Self::NonGlobalIp { domain: None, ip, - } => write!(f, "IP {ip} is not a global IP!"), + } => write!(f, "IP '{ip}' is not a global IP!"), + Self::Invalid { + domain, + } => write!(f, "Invalid host: '{domain}' contains invalid characters or exceeds the maximum length"), } } } @@ -217,7 +255,13 @@ impl CustomDnsResolver { } fn pre_resolve(name: &str) -> Result<(), CustomHttpClientError> { - if should_block_address(name) { + let Ok(host) = get_valid_host(name) else { + return Err(CustomHttpClientError::Invalid { + domain: name.to_string(), + }); + }; + + if should_block_host(&host).is_err() { return Err(CustomHttpClientError::Blocked { domain: name.to_string(), }); @@ -308,3 +352,209 @@ pub(crate) mod aws { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::util::is_global_hardcoded; + use std::net::Ipv4Addr; + use url::Host; + + // === + // IPv4 numeric-format normalization + fn parse_to_ip(s: &str) -> Option { + match Host::parse(s).ok()? { + Host::Ipv4(v4) => Some(IpAddr::V4(v4)), + Host::Ipv6(v6) => Some(IpAddr::V6(v6)), + Host::Domain(_) => None, + } + } + + #[test] + fn dotted_decimal_loopback_normalizes() { + let ip = parse_to_ip("127.0.0.1").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn single_decimal_loopback_normalizes() { + // 127.0.0.1 == 2130706433 + let ip = parse_to_ip("2130706433").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn hex_loopback_normalizes() { + let ip = parse_to_ip("0x7f000001").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn dotted_hex_loopback_normalizes() { + let ip = parse_to_ip("0x7f.0.0.1").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn octal_loopback_normalizes() { + // 017700000001 == 127.0.0.1 + let ip = parse_to_ip("017700000001").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn dotted_octal_loopback_normalizes() { + let ip = parse_to_ip("0177.0.0.01").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn aws_metadata_decimal_blocked() { + // 169.254.169.254 == 2852039166 (link-local, AWS IMDS) + let ip = parse_to_ip("2852039166").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(169, 254, 169, 254))); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn rfc1918_hex_blocked() { + // 10.0.0.1 + let ip = parse_to_ip("0x0a000001").unwrap(); + assert!(!is_global_hardcoded(ip)); + } + + #[test] + fn public_ip_decimal_allowed() { + // 8.8.8.8 == 134744072 + let ip = parse_to_ip("134744072").unwrap(); + assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(8, 8, 8, 8))); + assert!(is_global_hardcoded(ip)); + } + + // === + // get_valid_host integration: numeric forms become Host::Ipv4 + #[test] + fn get_valid_host_normalizes_decimal_int() { + let h = get_valid_host("2130706433").expect("valid"); + assert!(matches!(h, Host::Ipv4(ip) if ip == Ipv4Addr::new(127, 0, 0, 1))); + } + + #[test] + fn get_valid_host_normalizes_hex() { + let h = get_valid_host("0x7f000001").expect("valid"); + assert!(matches!(h, Host::Ipv4(ip) if ip == Ipv4Addr::new(127, 0, 0, 1))); + } + + #[test] + fn get_valid_host_normalizes_octal() { + let h = get_valid_host("017700000001").expect("valid"); + assert!(matches!(h, Host::Ipv4(ip) if ip == Ipv4Addr::new(127, 0, 0, 1))); + } + + // === + // IPv6 formats + #[test] + fn ipv6_loopback_blocked() { + let h = get_valid_host("[::1]").expect("valid"); + let Host::Ipv6(ip) = h else { + panic!("expected v6") + }; + assert!(!is_global_hardcoded(IpAddr::V6(ip))); + } + + #[test] + fn ipv4_mapped_in_ipv6_loopback_blocked() { + // ::ffff:127.0.0.1 — v4-mapped form; is_global_hardcoded blocks via ::ffff:0:0/96 + let h = get_valid_host("[::ffff:127.0.0.1]").expect("valid"); + let Host::Ipv6(ip) = h else { + panic!("expected v6") + }; + assert!(!is_global_hardcoded(IpAddr::V6(ip))); + } + + #[test] + fn ipv6_unique_local_blocked() { + let h = get_valid_host("[fc00::1]").expect("valid"); + let Host::Ipv6(ip) = h else { + panic!("expected v6") + }; + assert!(!is_global_hardcoded(IpAddr::V6(ip))); + } + + // === + // Punycode / IDN + #[test] + fn punycode_passthrough() { + let h = get_valid_host("xn--deadbeafcaf-lbb.test").expect("valid"); + match h { + Host::Domain(d) => assert_eq!(d, "xn--deadbeafcaf-lbb.test"), + _ => panic!("expected domain"), + } + } + + #[test] + fn idn_unicode_gets_punycoded() { + let h = get_valid_host("deadbeafcafé.test").expect("valid"); + match h { + Host::Domain(d) => assert_eq!(d, "xn--deadbeafcaf-lbb.test"), + _ => panic!("expected domain"), + } + } + + #[test] + fn idn_unicode_gets_punycoded_tld() { + let h = get_valid_host("deadbeaf.café").expect("valid"); + match h { + Host::Domain(d) => assert_eq!(d, "deadbeaf.xn--caf-dma"), + _ => panic!("expected domain"), + } + } + + #[test] + fn idn_emoji_gets_punycoded() { + let h = get_valid_host("xn--t88h.test").expect("valid"); // 🛡️.test + match h { + Host::Domain(d) => assert_eq!(d, "xn--t88h.test"), + _ => panic!("expected domain"), + } + } + + #[test] + fn idn_unicode_to_punycode_roundtrip() { + let from_unicode = get_valid_host("🛡️.test").expect("valid"); + let from_puny = get_valid_host("xn--t88h.test").expect("valid"); + match (from_unicode, from_puny) { + (Host::Domain(a), Host::Domain(b)) => assert_eq!(a, b), + _ => panic!("expected domains"), + } + } + + #[test] + fn invalid_punycode_rejected() { + // bare invalid punycode + assert!(get_valid_host("xn--").is_err()); + } + + #[test] + fn underscore_in_label_rejected() { + assert!(get_valid_host("dead_beaf.cafe").is_err()); + } + + #[test] + fn label_too_long_rejected() { + let label = "a".repeat(64); + assert!(get_valid_host(&format!("{label}.test")).is_err()); + } + + #[test] + fn domain_too_long_rejected() { + let big = "a.".repeat(130) + "test"; // > 253 + assert!(get_valid_host(&big).is_err()); + } +} diff --git a/src/util.rs b/src/util.rs index 06f00b98..5cd78eed 100644 --- a/src/util.rs +++ b/src/util.rs @@ -818,14 +818,18 @@ pub fn is_global_hardcoded(ip: std::net::IpAddr) -> bool { std::net::IpAddr::V4(ip) => { !(ip.octets()[0] == 0 // "This network" || ip.is_private() - || (ip.octets()[0] == 100 && (ip.octets()[1] & 0b1100_0000 == 0b0100_0000)) //ip.is_shared() + || (ip.octets()[0] == 100 && (ip.octets()[1] & 0b1100_0000 == 0b0100_0000)) // ip.is_shared() || ip.is_loopback() || ip.is_link_local() // addresses reserved for future protocols (`192.0.0.0/24`) - ||(ip.octets()[0] == 192 && ip.octets()[1] == 0 && ip.octets()[2] == 0) + // .9 and .10 are documented as globally reachable so they're excluded + || ( + ip.octets()[0] == 192 && ip.octets()[1] == 0 && ip.octets()[2] == 0 + && ip.octets()[3] != 9 && ip.octets()[3] != 10 + ) || ip.is_documentation() || (ip.octets()[0] == 198 && (ip.octets()[1] & 0xfe) == 18) // ip.is_benchmarking() - || (ip.octets()[0] & 240 == 240 && !ip.is_broadcast()) //ip.is_reserved() + || (ip.octets()[0] & 240 == 240 && !ip.is_broadcast()) // ip.is_reserved() || ip.is_broadcast()) } std::net::IpAddr::V6(ip) => { @@ -849,11 +853,17 @@ pub fn is_global_hardcoded(ip: std::net::IpAddr) -> bool { // AS112-v6 (`2001:4:112::/48`) || matches!(ip.segments(), [0x2001, 4, 0x112, _, _, _, _, _]) // ORCHIDv2 (`2001:20::/28`) - || matches!(ip.segments(), [0x2001, b, _, _, _, _, _, _] if (0x20..=0x2F).contains(&b)) + // Drone Remote ID Protocol Entity Tags (DETs) Prefix (`2001:30::/28`)` + || matches!(ip.segments(), [0x2001, b, _, _, _, _, _, _] if (0x20..=0x3F).contains(&b)) )) - || ((ip.segments()[0] == 0x2001) && (ip.segments()[1] == 0xdb8)) // ip.is_documentation() - || ((ip.segments()[0] & 0xfe00) == 0xfc00) //ip.is_unique_local() - || ((ip.segments()[0] & 0xffc0) == 0xfe80)) //ip.is_unicast_link_local() + // 6to4 (`2002::/16`) – it's not explicitly documented as globally reachable, + // IANA says N/A. + || matches!(ip.segments(), [0x2002, _, _, _, _, _, _, _]) + || matches!(ip.segments(), [0x2001, 0xdb8, ..] | [0x3fff, 0..=0x0fff, ..]) // ip.is_documentation() + // Segment Routing (SRv6) SIDs (`5f00::/16`) + || matches!(ip.segments(), [0x5f00, ..]) + || ip.is_unique_local() + || ip.is_unicast_link_local()) } } }