remove hardcoded 300kb limit on spider size with config option of 1MB default
modern websites are sadly massive, 300kb is pretty low. 1MB should be enough. Signed-off-by: strawberry <strawberry@puppygock.gay>
This commit is contained in:
parent
2ea895199a
commit
48e4b71dd1
|
@ -180,6 +180,9 @@ url_preview_domain_explicit_allowlist = []
|
|||
# Setting this to "*" will allow all URL previews. Please note that this opens up significant attack surface to your server, you are expected to be aware of the risks by doing so.
|
||||
url_preview_url_contains_allowlist = []
|
||||
|
||||
# Maximum amount of bytes allowed in a URL preview body size when spidering. Defaults to 1MB (1_000_000 bytes)
|
||||
url_preview_max_spider_size = 1_000_000
|
||||
|
||||
|
||||
|
||||
### Misc
|
||||
|
|
|
@ -342,14 +342,13 @@ async fn download_image(client: &reqwest::Client, url: &str) -> Result<UrlPrevie
|
|||
}
|
||||
|
||||
async fn download_html(client: &reqwest::Client, url: &str) -> Result<UrlPreviewData> {
|
||||
let max_download_size = 300_000; // TODO: is this bytes? kilobytes? megabytes?
|
||||
|
||||
let mut response = client.get(url).send().await?;
|
||||
|
||||
let mut bytes: Vec<u8> = Vec::new();
|
||||
while let Some(chunk) = response.chunk().await? {
|
||||
bytes.extend_from_slice(&chunk);
|
||||
if bytes.len() > max_download_size {
|
||||
if bytes.len() > services().globals.url_preview_max_spider_size() {
|
||||
debug!("Response body from URL {} exceeds url_preview_max_spider_size ({}), not processing the rest of the response body and assuming our necessary data is in this range.", url, services().globals.url_preview_max_spider_size());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -136,12 +136,12 @@ pub struct Config {
|
|||
|
||||
#[serde(default = "Vec::new")]
|
||||
pub url_preview_domain_contains_allowlist: Vec<String>,
|
||||
|
||||
#[serde(default = "Vec::new")]
|
||||
pub url_preview_domain_explicit_allowlist: Vec<String>,
|
||||
|
||||
#[serde(default = "Vec::new")]
|
||||
pub url_preview_url_contains_allowlist: Vec<String>,
|
||||
#[serde(default = "default_url_preview_max_spider_size")]
|
||||
pub url_preview_max_spider_size: usize,
|
||||
|
||||
#[serde(default = "RegexSet::empty")]
|
||||
#[serde(with = "serde_regex")]
|
||||
|
@ -370,6 +370,10 @@ impl fmt::Display for Config {
|
|||
"URL preview URL contains allowlist",
|
||||
&self.url_preview_url_contains_allowlist.join(", "),
|
||||
),
|
||||
(
|
||||
"URL preview maximum spider size",
|
||||
&self.url_preview_max_spider_size.to_string(),
|
||||
),
|
||||
];
|
||||
|
||||
let mut msg: String = "Active config values:\n\n".to_owned();
|
||||
|
@ -495,3 +499,7 @@ fn default_ip_range_denylist() -> Vec<String> {
|
|||
"fec0::/10".to_owned(),
|
||||
]
|
||||
}
|
||||
|
||||
fn default_url_preview_max_spider_size() -> usize {
|
||||
1_000_000 // 1MB
|
||||
}
|
||||
|
|
|
@ -412,6 +412,10 @@ impl Service<'_> {
|
|||
&self.config.url_preview_url_contains_allowlist
|
||||
}
|
||||
|
||||
pub fn url_preview_max_spider_size(&self) -> usize {
|
||||
self.config.url_preview_max_spider_size
|
||||
}
|
||||
|
||||
pub fn forbidden_room_names(&self) -> &RegexSet {
|
||||
&self.config.forbidden_room_names
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue