diff --git a/maigret/resources/simple_report.tpl b/maigret/resources/simple_report.tpl
index c2e3322c8..095d3d54b 100644
--- a/maigret/resources/simple_report.tpl
+++ b/maigret/resources/simple_report.tpl
@@ -78,6 +78,7 @@
{% endif %}
{{ v.url_user }}
+ (web.archive.org, archive.is)
{% if v.ids_data %}
diff --git a/maigret/resources/simple_report_pdf.tpl b/maigret/resources/simple_report_pdf.tpl
index f3db39584..6f30103e2 100644
--- a/maigret/resources/simple_report_pdf.tpl
+++ b/maigret/resources/simple_report_pdf.tpl
@@ -1,4 +1,4 @@
-type="text/css"
+
@@ -76,6 +76,7 @@
{% endif %}
{{ v.url_user }}
+ (web.archive.org, archive.is)
{% if v.ids_data %}
diff --git a/maigret/submit.py b/maigret/submit.py
index 2893c4711..3cd20fc85 100644
--- a/maigret/submit.py
+++ b/maigret/submit.py
@@ -217,14 +217,19 @@ async def check_features_manually(
)
self.logger.debug(second_html_response)
- # TODO: filter by errors, move to dialog function
- if (
- "/cdn-cgi/challenge-platform" in first_html_response
- or "\t\t\t\tnow: " in first_html_response
- or "Sorry, you have been blocked" in first_html_response
- ):
- self.logger.info("Cloudflare detected, skipping")
- return None, None, "Cloudflare detected, skipping", random_username
+ # Detect blocking patterns and provide helpful error messages
+ blocking_patterns = [
+ ("/cdn-cgi/challenge-platform", "Cloudflare challenge detected"),
+ ("Now checking your browser", "Cloudflare turnstile detected"),
+ ("Attention Required! | Cloudflare", "Cloudflare blocked"),
+ ("Sorry, you have been blocked", "Generic blocking page detected"),
+ ("Access to this page has been denied", "Access denied"),
+ ("CF-Chl-Alg-List:", "Cloudflare headers present"),
+ ]
+ for pattern, message in blocking_patterns:
+ if pattern in first_html_response:
+ self.logger.info(f"{message} (HTTP {first_status}/{second_status}), skipping")
+ return None, None, f"{message} (HTTP {first_status}/{second_status}), skipping", random_username
tokens_a = set(re.split(f'[{self.SEPARATORS}]', first_html_response))
tokens_b = set(re.split(f'[{self.SEPARATORS}]', second_html_response))