Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions maigret/resources/simple_report.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
{% endif %}
<p class="card-text">
<a href="{{ v.url_user }}" target="_blank">{{ v.url_user }}</a>
<span class="text-muted small">(<a href="https://web.archive.org/web/*/{{ v.url_user }}" target="_blank">web.archive.org</a>, <a href="https://archive.is/newest/{{ v.url_user }}" target="_blank">archive.is</a>)</span>
</p>
{% if v.ids_data %}
<table class="table table-striped">
Expand Down
3 changes: 2 additions & 1 deletion maigret/resources/simple_report_pdf.tpl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<html>type="text/css"
<html>
<head>
<meta charset="utf-8" />
</head>
Expand Down Expand Up @@ -76,6 +76,7 @@
{% endif %}
<p class="card-text">
<a href="{{ v.url_user }}" target="_blank">{{ v.url_user }}</a>
<span class="text-muted small">(<a href="https://web.archive.org/web/*/{{ v.url_user }}" target="_blank">web.archive.org</a>, <a href="https://archive.is/newest/{{ v.url_user }}" target="_blank">archive.is</a>)</span>
</p>
</div>
{% if v.ids_data %}
Expand Down
21 changes: 13 additions & 8 deletions maigret/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,19 @@ async def check_features_manually(
)
self.logger.debug(second_html_response)

# TODO: filter by errors, move to dialog function
if (
"/cdn-cgi/challenge-platform" in first_html_response
or "\t\t\t\tnow: " in first_html_response
or "Sorry, you have been blocked" in first_html_response
):
self.logger.info("Cloudflare detected, skipping")
return None, None, "Cloudflare detected, skipping", random_username
# Detect blocking patterns and provide helpful error messages
blocking_patterns = [
("/cdn-cgi/challenge-platform", "Cloudflare challenge detected"),
("Now checking your browser", "Cloudflare turnstile detected"),
("Attention Required! | Cloudflare", "Cloudflare blocked"),
("Sorry, you have been blocked", "Generic blocking page detected"),
("Access to this page has been denied", "Access denied"),
("CF-Chl-Alg-List:", "Cloudflare headers present"),
]
for pattern, message in blocking_patterns:
if pattern in first_html_response:
self.logger.info(f"{message} (HTTP {first_status}/{second_status}), skipping")
return None, None, f"{message} (HTTP {first_status}/{second_status}), skipping", random_username

tokens_a = set(re.split(f'[{self.SEPARATORS}]', first_html_response))
tokens_b = set(re.split(f'[{self.SEPARATORS}]', second_html_response))
Expand Down