Fix Host/IP resolving (#7162)

IPv4 addresses can also be in decimal or hex formats.
These were not checked during the Global IP check, and could bypass it.

We now convert everything to the right format before running this check and it will catch these formats.

Also updated the `is_global()` function to match Rust's still unstable version.
And updated the Image Magic checks to be more precise and filter out any possible broken or invalid formats.

While at it, also added several checks to ensure these special formatted IPv4 addresses are still blocked and punycode domains are also correctly resolved.

Signed-off-by: BlackDex <black.dex@gmail.com>
This commit is contained in:
Mathijs van Veluw 2026-04-29 22:20:59 +02:00 committed by GitHub
parent 5cc7360816
commit a354e57659
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 323 additions and 94 deletions

View File

@ -18,7 +18,7 @@ repos:
# When this version is updated, do not forget to update this in `.github/workflows/typos.yaml` too
- repo: https://github.com/crate-ci/typos
rev: cf5f1c29a8ac336af8568821ec41919923b05a83 # v1.45.1
rev: 7c572958218557a3272c2d6719629443b5cc26fd # v1.45.2
hooks:
- id: typos

View File

@ -23,4 +23,6 @@ extend-ignore-re = [
# https://github.com/bitwarden/server/blob/dff9f1cf538198819911cf2c20f8cda3307701c5/src/Notifications/HubHelpers.cs#L86
# https://github.com/bitwarden/clients/blob/9612a4ac45063e372a6fbe87eb253c7cb3c588fb/libs/common/src/auth/services/anonymous-hub.service.ts#L45
"AuthRequestResponseRecieved",
# Ignore Punycode/IDN tests
"xn--.+"
]

View File

@ -19,7 +19,7 @@ use svg_hush::{data_url_filter, Filter};
use crate::{
config::PathType,
error::Error,
http_client::{get_reqwest_client_builder, should_block_address, CustomHttpClientError},
http_client::{get_reqwest_client_builder, get_valid_host, should_block_host, CustomHttpClientError},
util::Cached,
CONFIG,
};
@ -81,19 +81,19 @@ static ICON_SIZE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?x)(\d+
// The function name `icon_external` is checked in the `on_response` function in `AppHeaders`
// It is used to prevent sending a specific header which breaks icon downloads.
// If this function needs to be renamed, also adjust the code in `util.rs`
#[get("/<domain>/icon.png")]
fn icon_external(domain: &str) -> Cached<Option<Redirect>> {
if !is_valid_domain(domain) {
warn!("Invalid domain: {domain}");
#[get("/<host>/icon.png")]
fn icon_external(host: &str) -> Cached<Option<Redirect>> {
let Ok(host) = get_valid_host(host) else {
warn!("Invalid host: {host}");
return Cached::ttl(None, CONFIG.icon_cache_negttl(), true);
};
if should_block_host(&host).is_err() {
warn!("Blocked address: {host}");
return Cached::ttl(None, CONFIG.icon_cache_negttl(), true);
}
if should_block_address(domain) {
warn!("Blocked address: {domain}");
return Cached::ttl(None, CONFIG.icon_cache_negttl(), true);
}
let url = CONFIG._icon_service_url().replace("{}", domain);
let url = CONFIG._icon_service_url().replace("{}", &host.to_string());
let redir = match CONFIG.icon_redirect_code() {
301 => Some(Redirect::moved(url)), // legacy permanent redirect
302 => Some(Redirect::found(url)), // legacy temporary redirect
@ -107,12 +107,21 @@ fn icon_external(domain: &str) -> Cached<Option<Redirect>> {
Cached::ttl(redir, CONFIG.icon_cache_ttl(), true)
}
#[get("/<domain>/icon.png")]
async fn icon_internal(domain: &str) -> Cached<(ContentType, Vec<u8>)> {
#[get("/<host>/icon.png")]
async fn icon_internal(host: &str) -> Cached<(ContentType, Vec<u8>)> {
const FALLBACK_ICON: &[u8] = include_bytes!("../static/images/fallback-icon.png");
if !is_valid_domain(domain) {
warn!("Invalid domain: {domain}");
let Ok(host) = get_valid_host(host) else {
warn!("Invalid host: {host}");
return Cached::ttl(
(ContentType::new("image", "png"), FALLBACK_ICON.to_vec()),
CONFIG.icon_cache_negttl(),
true,
);
};
if should_block_host(&host).is_err() {
warn!("Blocked address: {host}");
return Cached::ttl(
(ContentType::new("image", "png"), FALLBACK_ICON.to_vec()),
CONFIG.icon_cache_negttl(),
@ -120,16 +129,7 @@ async fn icon_internal(domain: &str) -> Cached<(ContentType, Vec<u8>)> {
);
}
if should_block_address(domain) {
warn!("Blocked address: {domain}");
return Cached::ttl(
(ContentType::new("image", "png"), FALLBACK_ICON.to_vec()),
CONFIG.icon_cache_negttl(),
true,
);
}
match get_icon(domain).await {
match get_icon(&host.to_string()).await {
Some((icon, icon_type)) => {
Cached::ttl((ContentType::new("image", icon_type), icon), CONFIG.icon_cache_ttl(), true)
}
@ -137,42 +137,6 @@ async fn icon_internal(domain: &str) -> Cached<(ContentType, Vec<u8>)> {
}
}
/// Returns if the domain provided is valid or not.
///
/// This does some manual checks and makes use of Url to do some basic checking.
/// domains can't be larger then 63 characters (not counting multiple subdomains) according to the RFC's, but we limit the total size to 255.
fn is_valid_domain(domain: &str) -> bool {
const ALLOWED_CHARS: &str = "-.";
// If parsing the domain fails using Url, it will not work with reqwest.
if let Err(parse_error) = url::Url::parse(format!("https://{domain}").as_str()) {
debug!("Domain parse error: '{domain}' - {parse_error:?}");
return false;
} else if domain.is_empty()
|| domain.contains("..")
|| domain.starts_with('.')
|| domain.starts_with('-')
|| domain.ends_with('-')
{
debug!(
"Domain validation error: '{domain}' is either empty, contains '..', starts with an '.', starts or ends with a '-'"
);
return false;
} else if domain.len() > 255 {
debug!("Domain validation error: '{domain}' exceeds 255 characters");
return false;
}
for c in domain.chars() {
if !c.is_alphanumeric() && !ALLOWED_CHARS.contains(c) {
debug!("Domain validation error: '{domain}' contains an invalid character '{c}'");
return false;
}
}
true
}
async fn get_icon(domain: &str) -> Option<(Vec<u8>, String)> {
let path = format!("{domain}.png");
@ -367,7 +331,7 @@ async fn get_icon_url(domain: &str) -> Result<IconUrlResult, Error> {
tld = domain_parts.next_back().unwrap(),
base = domain_parts.next_back().unwrap()
);
if is_valid_domain(&base_domain) {
if get_valid_host(&base_domain).is_ok() {
let sslbase = format!("https://{base_domain}");
let httpbase = format!("http://{base_domain}");
debug!("[get_icon_url]: Trying without subdomains '{base_domain}'");
@ -378,7 +342,7 @@ async fn get_icon_url(domain: &str) -> Result<IconUrlResult, Error> {
// When the domain is not an IP, and has less then 2 dots, try to add www. infront of it.
} else if is_ip.is_err() && domain.matches('.').count() < 2 {
let www_domain = format!("www.{domain}");
if is_valid_domain(&www_domain) {
if get_valid_host(&www_domain).is_ok() {
let sslwww = format!("https://{www_domain}");
let httpwww = format!("http://{www_domain}");
debug!("[get_icon_url]: Trying with www. prefix '{www_domain}'");
@ -618,14 +582,17 @@ fn get_icon_type(bytes: &[u8]) -> Option<&'static str> {
None
}
// Some details can be found here:
// - https://www.garykessler.net/library/file_sigs_GCK_latest.html
// - https://en.wikipedia.org/wiki/List_of_file_signatures
match bytes {
[137, 80, 78, 71, ..] => Some("png"),
[0, 0, 1, 0, ..] => Some("x-icon"),
[82, 73, 70, 70, ..] => Some("webp"),
[255, 216, 255, ..] => Some("jpeg"),
[71, 73, 70, 56, ..] => Some("gif"),
[66, 77, ..] => Some("bmp"),
[60, 115, 118, 103, ..] => Some("svg+xml"), // Normal svg
[137, 80, 78, 71, 13, 10, 26, 10, ..] => Some("png"),
[0, 0, 1, 0, n1, n2, ..] if u16::from_le_bytes([*n1, *n2]) > 0 => Some("x-icon"), // https://en.wikipedia.org/wiki/ICO_(file_format)
[82, 73, 70, 70, _, _, _, _, 87, 69, 66, 80, ..] => Some("webp"), // Only match WebP Images
[255, 216, 255, b, ..] if *b >= 0xC0 => Some("jpeg"),
[71, 73, 70, 56, 55 | 57, 97, ..] => Some("gif"),
[66, 77, _, _, _, _, 0, 0, 0, 0, ..] => Some("bmp"), // https://en.wikipedia.org/wiki/BMP_file_format
[60, 115, 118, 103, ..] => Some("svg+xml"), // Normal svg
[60, 63, 120, 109, 108, ..] => check_svg_after_xml_declaration(bytes), // An svg starting with <?xml
_ => None,
}

View File

@ -1,7 +1,6 @@
use std::{
fmt,
net::{IpAddr, SocketAddr},
str::FromStr,
sync::{Arc, LazyLock, Mutex},
time::Duration,
};
@ -59,16 +58,6 @@ pub fn get_reqwest_client_builder() -> ClientBuilder {
.timeout(Duration::from_secs(10))
}
pub fn should_block_address(domain_or_ip: &str) -> bool {
if let Ok(ip) = IpAddr::from_str(domain_or_ip) {
if should_block_ip(ip) {
return true;
}
}
should_block_address_regex(domain_or_ip)
}
fn should_block_ip(ip: IpAddr) -> bool {
if !CONFIG.http_request_block_non_global_ips() {
return false;
@ -100,11 +89,54 @@ fn should_block_address_regex(domain_or_ip: &str) -> bool {
is_match
}
fn should_block_host(host: &Host<&str>) -> Result<(), CustomHttpClientError> {
pub fn get_valid_host(host: &str) -> Result<Host, CustomHttpClientError> {
let Ok(host) = Host::parse(host) else {
return Err(CustomHttpClientError::Invalid {
domain: host.to_string(),
});
};
// Some extra checks to validate hosts
match host {
Host::Domain(ref domain) => {
// Host::parse() does not verify length or all possible invalid characters
// We do some extra checks here to prevent issues
if domain.len() > 253 {
debug!("Domain validation error: '{domain}' exceeds 253 characters");
return Err(CustomHttpClientError::Invalid {
domain: host.to_string(),
});
}
if !domain.split('.').all(|label| {
!label.is_empty()
// Labels can't be longer than 63 chars
&& label.len() <= 63
// Labels are not allowed to start or end with a hyphen `-`
&& !label.starts_with('-')
&& !label.ends_with('-')
// Only ASCII Alphanumeric characters are allowed
// We already received a punycoded domain back, so no unicode should exists here
&& label.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
}) {
debug!(
"Domain validation error: '{domain}' labels contain invalid characters or exceed the maximum length"
);
return Err(CustomHttpClientError::Invalid {
domain: host.to_string(),
});
}
}
Host::Ipv4(_) | Host::Ipv6(_) => {}
}
Ok(host)
}
pub fn should_block_host<S: AsRef<str>>(host: &Host<S>) -> Result<(), CustomHttpClientError> {
let (ip, host_str): (Option<IpAddr>, String) = match host {
Host::Ipv4(ip) => (Some(IpAddr::V4(*ip)), ip.to_string()),
Host::Ipv6(ip) => (Some(IpAddr::V6(*ip)), ip.to_string()),
Host::Domain(d) => (None, (*d).to_string()),
Host::Domain(d) => (None, d.as_ref().to_string()),
};
if let Some(ip) = ip {
@ -134,6 +166,9 @@ pub enum CustomHttpClientError {
domain: Option<String>,
ip: IpAddr,
},
Invalid {
domain: String,
},
}
impl CustomHttpClientError {
@ -155,7 +190,7 @@ impl fmt::Display for CustomHttpClientError {
match self {
Self::Blocked {
domain,
} => write!(f, "Blocked domain: {domain} matched HTTP_REQUEST_BLOCK_REGEX"),
} => write!(f, "Blocked domain: '{domain}' matched HTTP_REQUEST_BLOCK_REGEX"),
Self::NonGlobalIp {
domain: Some(domain),
ip,
@ -163,7 +198,10 @@ impl fmt::Display for CustomHttpClientError {
Self::NonGlobalIp {
domain: None,
ip,
} => write!(f, "IP {ip} is not a global IP!"),
} => write!(f, "IP '{ip}' is not a global IP!"),
Self::Invalid {
domain,
} => write!(f, "Invalid host: '{domain}' contains invalid characters or exceeds the maximum length"),
}
}
}
@ -217,7 +255,13 @@ impl CustomDnsResolver {
}
fn pre_resolve(name: &str) -> Result<(), CustomHttpClientError> {
if should_block_address(name) {
let Ok(host) = get_valid_host(name) else {
return Err(CustomHttpClientError::Invalid {
domain: name.to_string(),
});
};
if should_block_host(&host).is_err() {
return Err(CustomHttpClientError::Blocked {
domain: name.to_string(),
});
@ -308,3 +352,209 @@ pub(crate) mod aws {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::util::is_global_hardcoded;
use std::net::Ipv4Addr;
use url::Host;
// ===
// IPv4 numeric-format normalization
fn parse_to_ip(s: &str) -> Option<IpAddr> {
match Host::parse(s).ok()? {
Host::Ipv4(v4) => Some(IpAddr::V4(v4)),
Host::Ipv6(v6) => Some(IpAddr::V6(v6)),
Host::Domain(_) => None,
}
}
#[test]
fn dotted_decimal_loopback_normalizes() {
let ip = parse_to_ip("127.0.0.1").unwrap();
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
assert!(!is_global_hardcoded(ip));
}
#[test]
fn single_decimal_loopback_normalizes() {
// 127.0.0.1 == 2130706433
let ip = parse_to_ip("2130706433").unwrap();
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
assert!(!is_global_hardcoded(ip));
}
#[test]
fn hex_loopback_normalizes() {
let ip = parse_to_ip("0x7f000001").unwrap();
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
assert!(!is_global_hardcoded(ip));
}
#[test]
fn dotted_hex_loopback_normalizes() {
let ip = parse_to_ip("0x7f.0.0.1").unwrap();
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
assert!(!is_global_hardcoded(ip));
}
#[test]
fn octal_loopback_normalizes() {
// 017700000001 == 127.0.0.1
let ip = parse_to_ip("017700000001").unwrap();
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
assert!(!is_global_hardcoded(ip));
}
#[test]
fn dotted_octal_loopback_normalizes() {
let ip = parse_to_ip("0177.0.0.01").unwrap();
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
assert!(!is_global_hardcoded(ip));
}
#[test]
fn aws_metadata_decimal_blocked() {
// 169.254.169.254 == 2852039166 (link-local, AWS IMDS)
let ip = parse_to_ip("2852039166").unwrap();
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(169, 254, 169, 254)));
assert!(!is_global_hardcoded(ip));
}
#[test]
fn rfc1918_hex_blocked() {
// 10.0.0.1
let ip = parse_to_ip("0x0a000001").unwrap();
assert!(!is_global_hardcoded(ip));
}
#[test]
fn public_ip_decimal_allowed() {
// 8.8.8.8 == 134744072
let ip = parse_to_ip("134744072").unwrap();
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(8, 8, 8, 8)));
assert!(is_global_hardcoded(ip));
}
// ===
// get_valid_host integration: numeric forms become Host::Ipv4
#[test]
fn get_valid_host_normalizes_decimal_int() {
let h = get_valid_host("2130706433").expect("valid");
assert!(matches!(h, Host::Ipv4(ip) if ip == Ipv4Addr::new(127, 0, 0, 1)));
}
#[test]
fn get_valid_host_normalizes_hex() {
let h = get_valid_host("0x7f000001").expect("valid");
assert!(matches!(h, Host::Ipv4(ip) if ip == Ipv4Addr::new(127, 0, 0, 1)));
}
#[test]
fn get_valid_host_normalizes_octal() {
let h = get_valid_host("017700000001").expect("valid");
assert!(matches!(h, Host::Ipv4(ip) if ip == Ipv4Addr::new(127, 0, 0, 1)));
}
// ===
// IPv6 formats
#[test]
fn ipv6_loopback_blocked() {
let h = get_valid_host("[::1]").expect("valid");
let Host::Ipv6(ip) = h else {
panic!("expected v6")
};
assert!(!is_global_hardcoded(IpAddr::V6(ip)));
}
#[test]
fn ipv4_mapped_in_ipv6_loopback_blocked() {
// ::ffff:127.0.0.1 — v4-mapped form; is_global_hardcoded blocks via ::ffff:0:0/96
let h = get_valid_host("[::ffff:127.0.0.1]").expect("valid");
let Host::Ipv6(ip) = h else {
panic!("expected v6")
};
assert!(!is_global_hardcoded(IpAddr::V6(ip)));
}
#[test]
fn ipv6_unique_local_blocked() {
let h = get_valid_host("[fc00::1]").expect("valid");
let Host::Ipv6(ip) = h else {
panic!("expected v6")
};
assert!(!is_global_hardcoded(IpAddr::V6(ip)));
}
// ===
// Punycode / IDN
#[test]
fn punycode_passthrough() {
let h = get_valid_host("xn--deadbeafcaf-lbb.test").expect("valid");
match h {
Host::Domain(d) => assert_eq!(d, "xn--deadbeafcaf-lbb.test"),
_ => panic!("expected domain"),
}
}
#[test]
fn idn_unicode_gets_punycoded() {
let h = get_valid_host("deadbeafcafé.test").expect("valid");
match h {
Host::Domain(d) => assert_eq!(d, "xn--deadbeafcaf-lbb.test"),
_ => panic!("expected domain"),
}
}
#[test]
fn idn_unicode_gets_punycoded_tld() {
let h = get_valid_host("deadbeaf.café").expect("valid");
match h {
Host::Domain(d) => assert_eq!(d, "deadbeaf.xn--caf-dma"),
_ => panic!("expected domain"),
}
}
#[test]
fn idn_emoji_gets_punycoded() {
let h = get_valid_host("xn--t88h.test").expect("valid"); // 🛡️.test
match h {
Host::Domain(d) => assert_eq!(d, "xn--t88h.test"),
_ => panic!("expected domain"),
}
}
#[test]
fn idn_unicode_to_punycode_roundtrip() {
let from_unicode = get_valid_host("🛡️.test").expect("valid");
let from_puny = get_valid_host("xn--t88h.test").expect("valid");
match (from_unicode, from_puny) {
(Host::Domain(a), Host::Domain(b)) => assert_eq!(a, b),
_ => panic!("expected domains"),
}
}
#[test]
fn invalid_punycode_rejected() {
// bare invalid punycode
assert!(get_valid_host("xn--").is_err());
}
#[test]
fn underscore_in_label_rejected() {
assert!(get_valid_host("dead_beaf.cafe").is_err());
}
#[test]
fn label_too_long_rejected() {
let label = "a".repeat(64);
assert!(get_valid_host(&format!("{label}.test")).is_err());
}
#[test]
fn domain_too_long_rejected() {
let big = "a.".repeat(130) + "test"; // > 253
assert!(get_valid_host(&big).is_err());
}
}

View File

@ -818,14 +818,18 @@ pub fn is_global_hardcoded(ip: std::net::IpAddr) -> bool {
std::net::IpAddr::V4(ip) => {
!(ip.octets()[0] == 0 // "This network"
|| ip.is_private()
|| (ip.octets()[0] == 100 && (ip.octets()[1] & 0b1100_0000 == 0b0100_0000)) //ip.is_shared()
|| (ip.octets()[0] == 100 && (ip.octets()[1] & 0b1100_0000 == 0b0100_0000)) // ip.is_shared()
|| ip.is_loopback()
|| ip.is_link_local()
// addresses reserved for future protocols (`192.0.0.0/24`)
||(ip.octets()[0] == 192 && ip.octets()[1] == 0 && ip.octets()[2] == 0)
// .9 and .10 are documented as globally reachable so they're excluded
|| (
ip.octets()[0] == 192 && ip.octets()[1] == 0 && ip.octets()[2] == 0
&& ip.octets()[3] != 9 && ip.octets()[3] != 10
)
|| ip.is_documentation()
|| (ip.octets()[0] == 198 && (ip.octets()[1] & 0xfe) == 18) // ip.is_benchmarking()
|| (ip.octets()[0] & 240 == 240 && !ip.is_broadcast()) //ip.is_reserved()
|| (ip.octets()[0] & 240 == 240 && !ip.is_broadcast()) // ip.is_reserved()
|| ip.is_broadcast())
}
std::net::IpAddr::V6(ip) => {
@ -849,11 +853,17 @@ pub fn is_global_hardcoded(ip: std::net::IpAddr) -> bool {
// AS112-v6 (`2001:4:112::/48`)
|| matches!(ip.segments(), [0x2001, 4, 0x112, _, _, _, _, _])
// ORCHIDv2 (`2001:20::/28`)
|| matches!(ip.segments(), [0x2001, b, _, _, _, _, _, _] if (0x20..=0x2F).contains(&b))
// Drone Remote ID Protocol Entity Tags (DETs) Prefix (`2001:30::/28`)`
|| matches!(ip.segments(), [0x2001, b, _, _, _, _, _, _] if (0x20..=0x3F).contains(&b))
))
|| ((ip.segments()[0] == 0x2001) && (ip.segments()[1] == 0xdb8)) // ip.is_documentation()
|| ((ip.segments()[0] & 0xfe00) == 0xfc00) //ip.is_unique_local()
|| ((ip.segments()[0] & 0xffc0) == 0xfe80)) //ip.is_unicast_link_local()
// 6to4 (`2002::/16`) it's not explicitly documented as globally reachable,
// IANA says N/A.
|| matches!(ip.segments(), [0x2002, _, _, _, _, _, _, _])
|| matches!(ip.segments(), [0x2001, 0xdb8, ..] | [0x3fff, 0..=0x0fff, ..]) // ip.is_documentation()
// Segment Routing (SRv6) SIDs (`5f00::/16`)
|| matches!(ip.segments(), [0x5f00, ..])
|| ip.is_unique_local()
|| ip.is_unicast_link_local())
}
}
}