From 26258c03c57b2fd129df42db1841e66f43af37d5 Mon Sep 17 00:00:00 2001 From: timon0305 Date: Mon, 4 May 2026 23:24:38 +0200 Subject: [PATCH 1/3] fix: gate Flask debug / Werkzeug debugger behind opt-in flag (closes #9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Werkzeug debugger is a documented remote-code-execution primitive. app.py was hard-coding `debug=True`, which exposed RCE to anyone who could reach the listening port — a misconfigured `--host`, an SSH tunnel, or a careless reverse proxy was enough. - Remove the `debug=True` literal from app.py. - Default debug OFF. Opt-in via either `--debug` CLI flag or `FLASK_DEBUG=1` env var (truthy = "1" / "true" / "yes", case-insensitive, whitespace-tolerant). - Print a stderr WARNING when debug is enabled, naming the RCE risk and reminding the operator to bind only to loopback. - Gate the auto-reloader on the same flag. Live-tested all four matrix cells: (default off / --debug / FLASK_DEBUG=1 / FLASK_DEBUG=0). Bogus paths under debug-off return a plain Flask 404, not the Werkzeug debugger console. Helper `resolve_debug_flag(env_value, cli_flag)` lives in `utils/debug_flag.py` so it can be unit-tested without importing Flask (matching the existing test convention in tests/test_cli_args.py). Regression coverage in tests/test_cli_args.py adds 8 cases: - default-off, env-truthy, env-falsey, CLI override - argparse `--debug` default + explicit - source-level guard that fails if `debug=True` is reintroduced --- app.py | 27 ++++++++++++++++++-- tests/test_cli_args.py | 58 ++++++++++++++++++++++++++++++++++++++++++ utils/debug_flag.py | 21 +++++++++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 utils/debug_flag.py diff --git a/app.py b/app.py index 5b43f9d..338f83e 100644 --- a/app.py +++ b/app.py @@ -4,12 +4,15 @@ from the Cursor editor's AI chat feature. """ +import os import sys from datetime import datetime from pathlib import Path from flask import Flask, render_template, send_from_directory +from utils.debug_flag import resolve_debug_flag + from api.workspaces import bp as workspaces_bp from api.composers import bp as composers_bp from api.logs import bp as logs_bp @@ -101,6 +104,13 @@ def favicon(): help="Path to exclusion rules file (sensitive projects/chats are omitted). " "If omitted, uses ~/.cursor-chat-browser/exclusion-rules.txt if present.", ) + parser.add_argument( + "--debug", + action="store_true", + help="Enable Flask debug mode and the Werkzeug debugger. " + "DANGEROUS: allows remote code execution if the port is exposed. " + "Off by default; can also be enabled via FLASK_DEBUG=1.", + ) args = parser.parse_args() if args.base_dir: @@ -109,10 +119,23 @@ def favicon(): app = create_app(exclusion_rules_path=args.exclude_rules) print(f"Cursor Chat Browser (Python) running at http://{args.host}:{args.port}") + + debug_enabled = resolve_debug_flag(os.environ.get("FLASK_DEBUG"), args.debug) + if debug_enabled: + # Print the warning to stderr so it's visible even when stdout is + # piped/redirected. The Werkzeug debugger is a remote-code-execution + # primitive — anyone reaching the host:port can hijack the process. + print( + "WARNING: Flask debug mode ENABLED. The Werkzeug debugger allows " + "arbitrary code execution by anyone who can reach this server. " + "Bind only to 127.0.0.1 and never expose to untrusted networks.", + file=sys.stderr, + ) + # Disable reloader on Windows to avoid a socket conflict with Flask's stat reloader. app.run( host=args.host, port=args.port, - debug=True, - use_reloader=(sys.platform != "win32"), + debug=debug_enabled, + use_reloader=debug_enabled and (sys.platform != "win32"), ) diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py index 02438ba..cfe0041 100644 --- a/tests/test_cli_args.py +++ b/tests/test_cli_args.py @@ -43,6 +43,7 @@ def _build_app_parser(): parser.add_argument("--base-dir", default=None) parser.add_argument("--exclude-rules", "-e", default=None, metavar="PATH", dest="exclude_rules") + parser.add_argument("--debug", action="store_true") return parser @@ -246,5 +247,62 @@ def test_export_py_has_since_choices(self): self.assertIn('choices=["all", "last"]', src) +# --------------------------------------------------------------------------- +# Werkzeug debugger gating (security): debug must be off by default, +# opt-in via --debug or FLASK_DEBUG=1. Regression for the Critical +# `debug=True` exposure that was hard-coded in app.py. +# --------------------------------------------------------------------------- + +class TestDebugFlagGating(unittest.TestCase): + + # -- _resolve_debug_flag helper ------------------------------------------ + + def setUp(self): + # Import from the standalone utility module so the test does not pull + # Flask into scope (the rest of this file deliberately avoids Flask). + from utils.debug_flag import resolve_debug_flag + self._resolve = resolve_debug_flag + + def test_debug_off_when_env_unset_and_no_cli(self): + self.assertFalse(self._resolve(None, False)) + + def test_debug_off_when_env_empty_string(self): + self.assertFalse(self._resolve("", False)) + + def test_debug_off_for_explicit_falsey_env_values(self): + for v in ("0", "false", "False", "no", "off", "anything-not-truthy"): + with self.subTest(env=v): + self.assertFalse(self._resolve(v, False)) + + def test_debug_on_for_truthy_env_values(self): + for v in ("1", "true", "True", "TRUE", "yes", "YES", " 1 "): + with self.subTest(env=v): + self.assertTrue(self._resolve(v, False)) + + def test_cli_flag_overrides_env(self): + # Even with FLASK_DEBUG explicitly off, --debug should turn it on. + self.assertTrue(self._resolve("0", True)) + self.assertTrue(self._resolve(None, True)) + + # -- argparse: --debug flag ---------------------------------------------- + + def test_app_parser_debug_default_false(self): + opts = _build_app_parser().parse_args([]) + self.assertFalse(opts.debug) + + def test_app_parser_debug_explicit(self): + opts = _build_app_parser().parse_args(["--debug"]) + self.assertTrue(opts.debug) + + # -- source-level guard: app.py must NOT carry debug=True ---------------- + # If a future edit re-introduces the literal it'll be caught here. + + def test_app_py_does_not_hardcode_debug_true(self): + app_path = os.path.join(REPO_ROOT, "app.py") + with open(app_path, "r", encoding="utf-8") as f: + src = f.read() + self.assertNotIn("debug=True", src) + + if __name__ == "__main__": unittest.main() diff --git a/utils/debug_flag.py b/utils/debug_flag.py new file mode 100644 index 0000000..ba607f1 --- /dev/null +++ b/utils/debug_flag.py @@ -0,0 +1,21 @@ +"""Resolution of the Flask debug / Werkzeug debugger flag. + +Lives in `utils/` so it can be unit-tested without importing Flask +(which the test suite intentionally avoids — see tests/test_cli_args.py). +""" + + +def resolve_debug_flag(env_value, cli_flag): + """Return True iff Flask debug / Werkzeug debugger should be enabled. + + Off by default. The Werkzeug debugger lets a remote attacker execute + arbitrary Python in the server process, so debug mode must be opt-in + and never the default. Enabled only when: + - the operator explicitly passes --debug on the command line, or + - FLASK_DEBUG is set to a truthy value ("1", "true", "yes"). + """ + if cli_flag: + return True + if env_value is None: + return False + return env_value.strip().lower() in ("1", "true", "yes") From c2b4a21d022c4ebc4feb9516f1a916170fda4284 Mon Sep 17 00:00:00 2001 From: timon0305 Date: Tue, 5 May 2026 14:20:40 +0200 Subject: [PATCH 2/3] test: AST-walk the debug=True regression guard (CodeRabbit on PR #10) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old guard: `self.assertNotIn("debug=True", src)` — substring match. That misses cosmetic variants like `debug = True` (with spaces), multi-line `debug=\n True`, or any other form that produces the same runtime semantics. CodeRabbit correctly flagged it as evadable. Replaced with an `ast.walk(tree)` over the parsed app.py: find any `ast.Call` whose keywords contain `debug=True` as a literal Constant. Catches every cosmetic variant by definition. Failure message includes the offending line number(s) and the rationale (issue #9), so a future CI break is immediately debuggable. Verified by injecting `debug = True` (with spaces — the form the old check missed) into app.py: - Old check: would have passed (false negative). - New check: failed with `[136]` and the issue-#9 message. Then reverted the inject; test passes again. 42/42 tests still pass on the actual app.py. --- tests/test_cli_args.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py index cfe0041..3cf5497 100644 --- a/tests/test_cli_args.py +++ b/tests/test_cli_args.py @@ -9,6 +9,7 @@ python -m unittest tests.test_cli_args -v """ +import ast import sys import os import unittest @@ -294,14 +295,36 @@ def test_app_parser_debug_explicit(self): opts = _build_app_parser().parse_args(["--debug"]) self.assertTrue(opts.debug) - # -- source-level guard: app.py must NOT carry debug=True ---------------- - # If a future edit re-introduces the literal it'll be caught here. + # -- source-level guard: app.py must NOT carry a literal debug=True ------- + # AST-walk so cosmetic variations (`debug = True`, multi-line formatting, + # leading whitespace, etc.) cannot bypass the guard. A regression that + # reintroduces the literal in any form fails this test with the offending + # line number(s). def test_app_py_does_not_hardcode_debug_true(self): app_path = os.path.join(REPO_ROOT, "app.py") with open(app_path, "r", encoding="utf-8") as f: - src = f.read() - self.assertNotIn("debug=True", src) + tree = ast.parse(f.read(), filename=app_path) + + offenders = [] + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + for kw in node.keywords: + if ( + kw.arg == "debug" + and isinstance(kw.value, ast.Constant) + and kw.value.value is True + ): + offenders.append(kw.lineno) + + self.assertEqual( + offenders, [], + "Found a literal `debug=True` keyword argument in app.py at " + "line(s) %s. The Werkzeug debugger must be opt-in via the " + "--debug flag or FLASK_DEBUG env var (see issue #9), never " + "hard-coded." % offenders, + ) if __name__ == "__main__": From 0388d7ad3cd2a31766213f80162fef6a45856a3f Mon Sep 17 00:00:00 2001 From: timon0305 Date: Fri, 8 May 2026 21:58:44 +0200 Subject: [PATCH 3/3] =?UTF-8?q?review:=20address=20PR=20#20=20nits=20?= =?UTF-8?q?=E2=80=94=20broaden=20debug=3DTrue=20guard=20+=20FLASK=5FDEBUG?= =?UTF-8?q?=20note?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AST guard now handles ast.NameConstant (Py3.7) and **{"debug":True} dict-spread bypass; helper extracted for unit testing. - README: opt-in note for the Werkzeug debugger, including that FLASK_ENV=development is NOT consulted (only FLASK_DEBUG=1). - Replace em dashes in app.py comments with ASCII to silence GitHub's non-ASCII banner on review. --- README.md | 2 + app.py | 4 +- tests/test_cli_args.py | 95 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 87 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 53ed2b1..929d16c 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,8 @@ python app.py Open in your browser. +The Werkzeug debugger is **off by default** and must be opted in explicitly via the `--debug` flag or by setting `FLASK_DEBUG=1`. (Note: `FLASK_ENV=development` is **not** consulted - only `FLASK_DEBUG` is. See issue #9 for the rationale.) + ## CLI Export Export chat history to Markdown without starting the web server. Running with no arguments exports **everything** (all chats + composer logs) as a zip archive into the current directory. diff --git a/app.py b/app.py index 338f83e..f04feae 100644 --- a/app.py +++ b/app.py @@ -1,5 +1,5 @@ """ -Cursor Chat Browser — Python Edition +Cursor Chat Browser - Python Edition A Flask web application for browsing and managing chat histories from the Cursor editor's AI chat feature. """ @@ -124,7 +124,7 @@ def favicon(): if debug_enabled: # Print the warning to stderr so it's visible even when stdout is # piped/redirected. The Werkzeug debugger is a remote-code-execution - # primitive — anyone reaching the host:port can hijack the process. + # primitive - anyone reaching the host:port can hijack the process. print( "WARNING: Flask debug mode ENABLED. The Werkzeug debugger allows " "arbitrary code execution by anyone who can reach this server. " diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py index 3cf5497..d873d22 100644 --- a/tests/test_cli_args.py +++ b/tests/test_cli_args.py @@ -306,18 +306,7 @@ def test_app_py_does_not_hardcode_debug_true(self): with open(app_path, "r", encoding="utf-8") as f: tree = ast.parse(f.read(), filename=app_path) - offenders = [] - for node in ast.walk(tree): - if not isinstance(node, ast.Call): - continue - for kw in node.keywords: - if ( - kw.arg == "debug" - and isinstance(kw.value, ast.Constant) - and kw.value.value is True - ): - offenders.append(kw.lineno) - + offenders = _find_debug_true_offenders(tree) self.assertEqual( offenders, [], "Found a literal `debug=True` keyword argument in app.py at " @@ -327,5 +316,87 @@ def test_app_py_does_not_hardcode_debug_true(self): ) +class FindDebugTrueOffendersTests(unittest.TestCase): + """Unit tests for the AST-walk helper itself, so the regression guard + above keeps catching what we expect across Python AST shape changes. + + Covers: + - direct keyword `f(debug=True)` (ast.Constant on 3.8+, ast.NameConstant on 3.7) + - dict-spread `f(**{"debug": True})` bypass + - benign shapes that should NOT trip the guard (False, variable, attribute) + """ + + def _find(self, src): + return _find_debug_true_offenders(ast.parse(src)) + + def test_simple_keyword_literal(self): + self.assertEqual(self._find("app.run(debug=True)"), [1]) + + def test_keyword_false_not_flagged(self): + self.assertEqual(self._find("app.run(debug=False)"), []) + + def test_keyword_variable_not_flagged(self): + # Out of scope per PR review - only literals are tracked. + self.assertEqual(self._find("flag = True\napp.run(debug=flag)"), []) + + def test_keyword_attribute_not_flagged(self): + self.assertEqual(self._find("app.run(debug=cfg.debug_on)"), []) + + def test_dict_spread_literal(self): + # Determined-bypass shape: kwargs come in via **dict literal. + offenders = self._find("app.run(**{'debug': True})") + self.assertEqual(len(offenders), 1) + + def test_dict_spread_false_not_flagged(self): + self.assertEqual(self._find("app.run(**{'debug': False})"), []) + + def test_dict_spread_other_key_not_flagged(self): + self.assertEqual(self._find("app.run(**{'foo': True})"), []) + + +# --------------------------------------------------------------------------- +# AST helper (module-level so it's testable in isolation) +# --------------------------------------------------------------------------- + +def _find_debug_true_offenders(tree): + """Return line numbers of any literal `debug=True` (or `**{"debug": True}`) + on a Call node in the AST. + + Cross-version safe: works with both ast.Constant (3.8+) and the legacy + ast.NameConstant shape (3.7) by reading `.value` attribute-style rather + than narrowing to a specific node class. Only literal True is flagged; + `debug=variable` and `debug=mod.attr` are out of scope. + """ + offenders = [] + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + for kw in node.keywords: + # Shape 1: direct keyword - f(debug=True) + if kw.arg == "debug" and _is_literal_true(kw.value): + offenders.append(kw.lineno) + continue + # Shape 2: dict-spread - f(**{"debug": True}) + if kw.arg is None and isinstance(kw.value, ast.Dict): + for k, v in zip(kw.value.keys, kw.value.values): + if _is_str_literal(k, "debug") and _is_literal_true(v): + offenders.append(getattr(v, "lineno", kw.lineno)) + return offenders + + +def _is_literal_true(node): + """True only when *node* is the literal True (ast.Constant on 3.8+, + ast.NameConstant on 3.7). Excludes variables/attributes via the strict + `is True` identity check on `.value`.""" + return getattr(node, "value", None) is True + + +def _is_str_literal(node, expected): + """True when *node* is a string literal equal to *expected* (handles + ast.Constant on 3.8+ and ast.Str on 3.7).""" + val = getattr(node, "value", getattr(node, "s", None)) + return isinstance(val, str) and val == expected + + if __name__ == "__main__": unittest.main()