From d889552ac6834518e578df8a2e70f5feed36647f Mon Sep 17 00:00:00 2001 From: yyq1043 Date: Fri, 26 Jun 2026 20:05:48 +0800 Subject: [PATCH 1/4] Add archive.org and archive.is links to profile URL blocks Adds quick-access links to web archives (Wayback Machine and archive.is) in profile URL report blocks, allowing users to see historical snapshots of profile pages when they still exist but are no longer accessible. Closes #247 --- maigret/resources/simple_report.tpl | 1 + maigret/resources/simple_report_pdf.tpl | 1 + 2 files changed, 2 insertions(+) diff --git a/maigret/resources/simple_report.tpl b/maigret/resources/simple_report.tpl index c2e3322c8..095d3d54b 100644 --- a/maigret/resources/simple_report.tpl +++ b/maigret/resources/simple_report.tpl @@ -78,6 +78,7 @@ {% endif %}

{{ v.url_user }} + (web.archive.org, archive.is)

{% if v.ids_data %} diff --git a/maigret/resources/simple_report_pdf.tpl b/maigret/resources/simple_report_pdf.tpl index f3db39584..f2f072714 100644 --- a/maigret/resources/simple_report_pdf.tpl +++ b/maigret/resources/simple_report_pdf.tpl @@ -76,6 +76,7 @@ {% endif %}

{{ v.url_user }} + (web.archive.org, archive.is)

{% if v.ids_data %} From 4937dbd2893cdccc030c1129d6ef94e7226a2da2 Mon Sep 17 00:00:00 2001 From: yyq1043 Date: Sat, 27 Jun 2026 18:17:09 +0800 Subject: [PATCH 2/4] Improve blocking pattern detection in submit mode Expand Cloudflare/anti-bot detection to catch more blocking patterns and provide clearer error messages for each type. Addresses the 'filter by errors' TODO from submit mode improvements. --- maigret/submit.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/maigret/submit.py b/maigret/submit.py index 2893c4711..d1ec6f30c 100644 --- a/maigret/submit.py +++ b/maigret/submit.py @@ -217,14 +217,19 @@ async def check_features_manually( ) self.logger.debug(second_html_response) - # TODO: filter by errors, move to dialog function - if ( - "/cdn-cgi/challenge-platform" in first_html_response - or "\t\t\t\tnow: " in first_html_response - or "Sorry, you have been blocked" in first_html_response - ): - self.logger.info("Cloudflare detected, skipping") - return None, None, "Cloudflare detected, skipping", random_username + # Detect blocking patterns and provide helpful error messages + blocking_patterns = [ + ("/cdn-cgi/challenge-platform", "Cloudflare challenge detected"), + ("Now checking your browser", "Cloudflare turnstile detected"), + ("Attention Required! | Cloudflare", "Cloudflare blocked"), + ("Sorry, you have been blocked", "Generic blocking page detected"), + ("Access to this page has been denied", "Access denied"), + ("CF-Chl-Alg-List:", "Cloudflare headers present"), + ] + for pattern, message in blocking_patterns: + if pattern in first_html_response: + self.logger.info(f"{message}, skipping") + return None, None, f"{message}, skipping", random_username tokens_a = set(re.split(f'[{self.SEPARATORS}]', first_html_response)) tokens_b = set(re.split(f'[{self.SEPARATORS}]', second_html_response)) From d2a7311dbd903d36d3e173061c7c57211a9ec88f Mon Sep 17 00:00:00 2001 From: yyq1043 Date: Sat, 27 Jun 2026 18:20:41 +0800 Subject: [PATCH 3/4] Include HTTP status codes in blocking detection messages When a site blocks automated access, include the HTTP status codes from the existing/non-existing account responses in the error message for easier debugging. --- maigret/submit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maigret/submit.py b/maigret/submit.py index d1ec6f30c..3cd20fc85 100644 --- a/maigret/submit.py +++ b/maigret/submit.py @@ -228,8 +228,8 @@ async def check_features_manually( ] for pattern, message in blocking_patterns: if pattern in first_html_response: - self.logger.info(f"{message}, skipping") - return None, None, f"{message}, skipping", random_username + self.logger.info(f"{message} (HTTP {first_status}/{second_status}), skipping") + return None, None, f"{message} (HTTP {first_status}/{second_status}), skipping", random_username tokens_a = set(re.split(f'[{self.SEPARATORS}]', first_html_response)) tokens_b = set(re.split(f'[{self.SEPARATORS}]', second_html_response)) From 62d4e9f3d3655fc6682e574b5257817b00e433a6 Mon Sep 17 00:00:00 2001 From: yyq1043 Date: Sat, 27 Jun 2026 18:45:24 +0800 Subject: [PATCH 4/4] Fix broken HTML tag in simple_report_pdf.tpl --- maigret/resources/simple_report_pdf.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maigret/resources/simple_report_pdf.tpl b/maigret/resources/simple_report_pdf.tpl index f2f072714..6f30103e2 100644 --- a/maigret/resources/simple_report_pdf.tpl +++ b/maigret/resources/simple_report_pdf.tpl @@ -1,4 +1,4 @@ -type="text/css" +