From 26258c03c57b2fd129df42db1841e66f43af37d5 Mon Sep 17 00:00:00 2001 From: timon0305 Date: Mon, 4 May 2026 23:24:38 +0200 Subject: [PATCH 1/2] fix: gate Flask debug / Werkzeug debugger behind opt-in flag (closes #9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Werkzeug debugger is a documented remote-code-execution primitive. app.py was hard-coding `debug=True`, which exposed RCE to anyone who could reach the listening port — a misconfigured `--host`, an SSH tunnel, or a careless reverse proxy was enough. - Remove the `debug=True` literal from app.py. - Default debug OFF. Opt-in via either `--debug` CLI flag or `FLASK_DEBUG=1` env var (truthy = "1" / "true" / "yes", case-insensitive, whitespace-tolerant). - Print a stderr WARNING when debug is enabled, naming the RCE risk and reminding the operator to bind only to loopback. - Gate the auto-reloader on the same flag. Live-tested all four matrix cells: (default off / --debug / FLASK_DEBUG=1 / FLASK_DEBUG=0). Bogus paths under debug-off return a plain Flask 404, not the Werkzeug debugger console. Helper `resolve_debug_flag(env_value, cli_flag)` lives in `utils/debug_flag.py` so it can be unit-tested without importing Flask (matching the existing test convention in tests/test_cli_args.py). Regression coverage in tests/test_cli_args.py adds 8 cases: - default-off, env-truthy, env-falsey, CLI override - argparse `--debug` default + explicit - source-level guard that fails if `debug=True` is reintroduced --- app.py | 27 ++++++++++++++++++-- tests/test_cli_args.py | 58 ++++++++++++++++++++++++++++++++++++++++++ utils/debug_flag.py | 21 +++++++++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 utils/debug_flag.py diff --git a/app.py b/app.py index 5b43f9d..338f83e 100644 --- a/app.py +++ b/app.py @@ -4,12 +4,15 @@ from the Cursor editor's AI chat feature. """ +import os import sys from datetime import datetime from pathlib import Path from flask import Flask, render_template, send_from_directory +from utils.debug_flag import resolve_debug_flag + from api.workspaces import bp as workspaces_bp from api.composers import bp as composers_bp from api.logs import bp as logs_bp @@ -101,6 +104,13 @@ def favicon(): help="Path to exclusion rules file (sensitive projects/chats are omitted). " "If omitted, uses ~/.cursor-chat-browser/exclusion-rules.txt if present.", ) + parser.add_argument( + "--debug", + action="store_true", + help="Enable Flask debug mode and the Werkzeug debugger. " + "DANGEROUS: allows remote code execution if the port is exposed. " + "Off by default; can also be enabled via FLASK_DEBUG=1.", + ) args = parser.parse_args() if args.base_dir: @@ -109,10 +119,23 @@ def favicon(): app = create_app(exclusion_rules_path=args.exclude_rules) print(f"Cursor Chat Browser (Python) running at http://{args.host}:{args.port}") + + debug_enabled = resolve_debug_flag(os.environ.get("FLASK_DEBUG"), args.debug) + if debug_enabled: + # Print the warning to stderr so it's visible even when stdout is + # piped/redirected. The Werkzeug debugger is a remote-code-execution + # primitive — anyone reaching the host:port can hijack the process. + print( + "WARNING: Flask debug mode ENABLED. The Werkzeug debugger allows " + "arbitrary code execution by anyone who can reach this server. " + "Bind only to 127.0.0.1 and never expose to untrusted networks.", + file=sys.stderr, + ) + # Disable reloader on Windows to avoid a socket conflict with Flask's stat reloader. app.run( host=args.host, port=args.port, - debug=True, - use_reloader=(sys.platform != "win32"), + debug=debug_enabled, + use_reloader=debug_enabled and (sys.platform != "win32"), ) diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py index 02438ba..cfe0041 100644 --- a/tests/test_cli_args.py +++ b/tests/test_cli_args.py @@ -43,6 +43,7 @@ def _build_app_parser(): parser.add_argument("--base-dir", default=None) parser.add_argument("--exclude-rules", "-e", default=None, metavar="PATH", dest="exclude_rules") + parser.add_argument("--debug", action="store_true") return parser @@ -246,5 +247,62 @@ def test_export_py_has_since_choices(self): self.assertIn('choices=["all", "last"]', src) +# --------------------------------------------------------------------------- +# Werkzeug debugger gating (security): debug must be off by default, +# opt-in via --debug or FLASK_DEBUG=1. Regression for the Critical +# `debug=True` exposure that was hard-coded in app.py. +# --------------------------------------------------------------------------- + +class TestDebugFlagGating(unittest.TestCase): + + # -- _resolve_debug_flag helper ------------------------------------------ + + def setUp(self): + # Import from the standalone utility module so the test does not pull + # Flask into scope (the rest of this file deliberately avoids Flask). + from utils.debug_flag import resolve_debug_flag + self._resolve = resolve_debug_flag + + def test_debug_off_when_env_unset_and_no_cli(self): + self.assertFalse(self._resolve(None, False)) + + def test_debug_off_when_env_empty_string(self): + self.assertFalse(self._resolve("", False)) + + def test_debug_off_for_explicit_falsey_env_values(self): + for v in ("0", "false", "False", "no", "off", "anything-not-truthy"): + with self.subTest(env=v): + self.assertFalse(self._resolve(v, False)) + + def test_debug_on_for_truthy_env_values(self): + for v in ("1", "true", "True", "TRUE", "yes", "YES", " 1 "): + with self.subTest(env=v): + self.assertTrue(self._resolve(v, False)) + + def test_cli_flag_overrides_env(self): + # Even with FLASK_DEBUG explicitly off, --debug should turn it on. + self.assertTrue(self._resolve("0", True)) + self.assertTrue(self._resolve(None, True)) + + # -- argparse: --debug flag ---------------------------------------------- + + def test_app_parser_debug_default_false(self): + opts = _build_app_parser().parse_args([]) + self.assertFalse(opts.debug) + + def test_app_parser_debug_explicit(self): + opts = _build_app_parser().parse_args(["--debug"]) + self.assertTrue(opts.debug) + + # -- source-level guard: app.py must NOT carry debug=True ---------------- + # If a future edit re-introduces the literal it'll be caught here. + + def test_app_py_does_not_hardcode_debug_true(self): + app_path = os.path.join(REPO_ROOT, "app.py") + with open(app_path, "r", encoding="utf-8") as f: + src = f.read() + self.assertNotIn("debug=True", src) + + if __name__ == "__main__": unittest.main() diff --git a/utils/debug_flag.py b/utils/debug_flag.py new file mode 100644 index 0000000..ba607f1 --- /dev/null +++ b/utils/debug_flag.py @@ -0,0 +1,21 @@ +"""Resolution of the Flask debug / Werkzeug debugger flag. + +Lives in `utils/` so it can be unit-tested without importing Flask +(which the test suite intentionally avoids — see tests/test_cli_args.py). +""" + + +def resolve_debug_flag(env_value, cli_flag): + """Return True iff Flask debug / Werkzeug debugger should be enabled. + + Off by default. The Werkzeug debugger lets a remote attacker execute + arbitrary Python in the server process, so debug mode must be opt-in + and never the default. Enabled only when: + - the operator explicitly passes --debug on the command line, or + - FLASK_DEBUG is set to a truthy value ("1", "true", "yes"). + """ + if cli_flag: + return True + if env_value is None: + return False + return env_value.strip().lower() in ("1", "true", "yes") From c2b4a21d022c4ebc4feb9516f1a916170fda4284 Mon Sep 17 00:00:00 2001 From: timon0305 Date: Tue, 5 May 2026 14:20:40 +0200 Subject: [PATCH 2/2] test: AST-walk the debug=True regression guard (CodeRabbit on PR #10) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old guard: `self.assertNotIn("debug=True", src)` — substring match. That misses cosmetic variants like `debug = True` (with spaces), multi-line `debug=\n True`, or any other form that produces the same runtime semantics. CodeRabbit correctly flagged it as evadable. Replaced with an `ast.walk(tree)` over the parsed app.py: find any `ast.Call` whose keywords contain `debug=True` as a literal Constant. Catches every cosmetic variant by definition. Failure message includes the offending line number(s) and the rationale (issue #9), so a future CI break is immediately debuggable. Verified by injecting `debug = True` (with spaces — the form the old check missed) into app.py: - Old check: would have passed (false negative). - New check: failed with `[136]` and the issue-#9 message. Then reverted the inject; test passes again. 42/42 tests still pass on the actual app.py. --- tests/test_cli_args.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py index cfe0041..3cf5497 100644 --- a/tests/test_cli_args.py +++ b/tests/test_cli_args.py @@ -9,6 +9,7 @@ python -m unittest tests.test_cli_args -v """ +import ast import sys import os import unittest @@ -294,14 +295,36 @@ def test_app_parser_debug_explicit(self): opts = _build_app_parser().parse_args(["--debug"]) self.assertTrue(opts.debug) - # -- source-level guard: app.py must NOT carry debug=True ---------------- - # If a future edit re-introduces the literal it'll be caught here. + # -- source-level guard: app.py must NOT carry a literal debug=True ------- + # AST-walk so cosmetic variations (`debug = True`, multi-line formatting, + # leading whitespace, etc.) cannot bypass the guard. A regression that + # reintroduces the literal in any form fails this test with the offending + # line number(s). def test_app_py_does_not_hardcode_debug_true(self): app_path = os.path.join(REPO_ROOT, "app.py") with open(app_path, "r", encoding="utf-8") as f: - src = f.read() - self.assertNotIn("debug=True", src) + tree = ast.parse(f.read(), filename=app_path) + + offenders = [] + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + for kw in node.keywords: + if ( + kw.arg == "debug" + and isinstance(kw.value, ast.Constant) + and kw.value.value is True + ): + offenders.append(kw.lineno) + + self.assertEqual( + offenders, [], + "Found a literal `debug=True` keyword argument in app.py at " + "line(s) %s. The Werkzeug debugger must be opt-in via the " + "--debug flag or FLASK_DEBUG env var (see issue #9), never " + "hard-coded." % offenders, + ) if __name__ == "__main__":