Skip to content

Commit 26bda4c

Browse files
fix(deploy): address MilagrosMarin review on #1466
- Replace `assert` statements with explicit `if … raise DataJointError(…)` in the dispatch ladder. Project convention is to avoid assertions for runtime guarantees because Python's -O strips them; deploy-hook environments may run under -O and would otherwise see AttributeError on None instead of the intended DataJointError. The unactivated-schema case (database is None) is the most concerning since it would have emitted a malformed ALTER TABLE. - Add four unit tests covering previously-untested branches: - `test_set_replica_identity_table_instance_target` — Table-instance dispatch (deploy.py: isinstance(target, Table) branch). - `test_set_replica_identity_table_class_target` — Table-class dispatch (deploy.py: issubclass(target, Table) branch). - `test_set_replica_identity_case_insensitive_mode` — mode="FULL" accepted, matching adapter's case-tolerant handling. - `test_set_replica_identity_unactivated_schema_raises` — Schema with database=None raises rather than emitting malformed DDL. - Accept uppercase mode strings (`mode.lower()` normalization) for parity with the PG adapter's case-tolerant DDL emission. - Tighten type hints: `target: TargetType` (Union of Schema/Table-class/ Table-instance via TYPE_CHECKING forward references), `mode: Literal[ "default", "full"]`. Runtime validation unchanged. - Docstring additions: - Cost section now flags the brief AccessExclusiveLock the ALTER takes. - New "Partial-failure semantics" paragraph explaining that exceptions on table N of M leave first N-1 tables modified but propagate without returning the partial summary; idempotency makes re-running safe. - Style: split the adjacent-string concatenation on the "PostgreSQL-only" error into a single literal. All 20 unit tests pass (was 16 + 4 new).
1 parent 13fd087 commit 26bda4c

2 files changed

Lines changed: 105 additions & 14 deletions

File tree

src/datajoint/deploy.py

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,22 @@
2020

2121
from __future__ import annotations
2222

23-
from typing import Any
23+
from typing import TYPE_CHECKING, Any, Literal, Union
2424

2525
from .errors import DataJointError
2626

27+
if TYPE_CHECKING:
28+
from .schemas import _Schema
29+
from .table import Table
30+
31+
TargetType = Union["_Schema", type["Table"], "Table"]
2732

28-
def set_replica_identity(target: Any, mode: str = "full", dry_run: bool = True) -> dict:
33+
34+
def set_replica_identity(
35+
target: "TargetType",
36+
mode: Literal["default", "full"] = "full",
37+
dry_run: bool = True,
38+
) -> dict:
2939
"""
3040
Apply ``ALTER TABLE ... REPLICA IDENTITY <mode>`` to a schema or table on PostgreSQL.
3141
@@ -52,12 +62,24 @@ def set_replica_identity(target: Any, mode: str = "full", dry_run: bool = True)
5262
5363
Cost
5464
----
55-
The ALTER itself is metadata-only and instant. The cost is in WAL volume
56-
after the change: UPDATE/DELETE on tables with FULL log the entire old row,
57-
which can be sizable on tables with TOASTed bytea columns. For DataJoint's
58-
typical insert-append workload, this cost is negligible. The notable
59-
scenario is bulk ``delete()`` on tables with ``<blob>`` columns — a
60-
transient WAL burst proportional to the deleted-row payload size.
65+
The ALTER itself is metadata-only and instant, but requires a brief
66+
``AccessExclusiveLock`` on each table — it will block behind in-flight
67+
writes/reads on a busy table. Run during a quiet window on actively-
68+
ingested tables.
69+
70+
The ongoing cost is in WAL volume after the change: UPDATE/DELETE on
71+
tables with FULL log the entire old row, which can be sizable on tables
72+
with TOASTed bytea columns. For DataJoint's typical insert-append
73+
workload, this cost is negligible. The notable scenario is bulk
74+
``delete()`` on tables with ``<blob>`` columns — a transient WAL burst
75+
proportional to the deleted-row payload size.
76+
77+
Partial-failure semantics
78+
-------------------------
79+
If ``connection.query(ddl)`` raises on table N of M, the first N-1
80+
tables are already modified at the storage layer but the exception
81+
propagates without returning the partial summary. The operation is
82+
idempotent, so re-running brings the remaining tables into compliance.
6183
6284
Compliance considerations
6385
-------------------------
@@ -112,35 +134,41 @@ def set_replica_identity(target: Any, mode: str = "full", dry_run: bool = True)
112134
Databricks: `Lakehouse Sync
113135
<https://docs.databricks.com/aws/en/oltp/projects/lakehouse-sync>`_.
114136
"""
115-
if mode not in ("default", "full"):
137+
mode_normalized = mode.lower() if isinstance(mode, str) else mode
138+
if mode_normalized not in ("default", "full"):
116139
raise DataJointError(f"mode must be 'default' or 'full'; got {mode!r}")
140+
mode = mode_normalized # type: ignore[assignment]
117141

118142
from .schemas import _Schema
119143
from .table import Table
120144

121145
if isinstance(target, _Schema):
122146
connection = target.connection
123-
assert connection is not None, "Schema has no active connection"
147+
if connection is None:
148+
raise DataJointError("Schema has no active connection.")
124149
adapter = connection.adapter
125-
assert target.database is not None, "Schema is not activated"
150+
if target.database is None:
151+
raise DataJointError("Schema is not activated. Call schema.activate(...) before set_replica_identity().")
126152
tables = [adapter.make_full_table_name(target.database, t) for t in target.list_tables()]
127153
elif isinstance(target, type) and issubclass(target, Table):
128154
instance = target()
129155
connection = instance.connection
130-
assert connection is not None, "Table has no active connection"
156+
if connection is None:
157+
raise DataJointError(f"Table {target.__name__} has no active connection.")
131158
adapter = connection.adapter
132159
tables = [instance.full_table_name]
133160
elif isinstance(target, Table):
134161
connection = target.connection
135-
assert connection is not None, "Table has no active connection"
162+
if connection is None:
163+
raise DataJointError(f"Table {type(target).__name__} has no active connection.")
136164
adapter = connection.adapter
137165
tables = [target.full_table_name]
138166
else:
139167
raise DataJointError(f"target must be a Schema or Table class/instance; got {type(target).__name__}")
140168

141169
if not hasattr(adapter, "replica_identity_ddl"):
142170
raise DataJointError(
143-
f"set_replica_identity is PostgreSQL-only; the {adapter.backend} adapter " "does not support REPLICA IDENTITY."
171+
f"set_replica_identity is PostgreSQL-only; the {adapter.backend} adapter does not support REPLICA IDENTITY."
144172
)
145173

146174
result: dict[str, Any] = {

tests/unit/test_deploy.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,66 @@ def test_set_replica_identity_empty_schema():
107107
schema = _FakeSchema("ms", [], _FakeAdapter())
108108
result = set_replica_identity(schema, mode="full", dry_run=False)
109109
assert result == {"tables_analyzed": 0, "tables_modified": 0, "ddl": []}
110+
111+
112+
class _FakeTable(dj.Table):
113+
"""Table stub bypassing schema-decoration wiring."""
114+
115+
# Suppress dj.Table's class-construction checks
116+
table_name = "fake_table"
117+
118+
def __init__(self, full_table_name: str, adapter: object) -> None:
119+
# Skip dj.Table.__init__ — fabricate the minimal attributes.
120+
self._full_table_name = full_table_name
121+
self._connection = _FakeConnection(adapter)
122+
123+
@property
124+
def full_table_name(self) -> str:
125+
return self._full_table_name
126+
127+
@property
128+
def connection(self):
129+
return self._connection
130+
131+
132+
def test_set_replica_identity_table_instance_target():
133+
"""Table instance dispatch (deploy.py: isinstance(target, Table) branch)."""
134+
table = _FakeTable('"ms"."the_table"', _FakeAdapter())
135+
result = set_replica_identity(table, mode="full", dry_run=False)
136+
assert result == {
137+
"tables_analyzed": 1,
138+
"tables_modified": 1,
139+
"ddl": ['ALTER TABLE "ms"."the_table" REPLICA IDENTITY FULL'],
140+
}
141+
assert table.connection.queries == result["ddl"]
142+
143+
144+
def test_set_replica_identity_table_class_target(monkeypatch):
145+
"""Table-class dispatch (deploy.py: issubclass(target, Table) branch)."""
146+
# Build a class that instantiates a _FakeTable when called like target()
147+
fake_adapter = _FakeAdapter()
148+
149+
class _TableClass(dj.Table):
150+
def __new__(cls):
151+
return _FakeTable('"ms"."class_table"', fake_adapter)
152+
153+
# `isinstance(_TableClass, type) and issubclass(_TableClass, dj.Table)` is True.
154+
result = set_replica_identity(_TableClass, mode="full", dry_run=True)
155+
assert result["tables_analyzed"] == 1
156+
assert result["tables_modified"] == 0 # dry_run
157+
assert result["ddl"] == ['ALTER TABLE "ms"."class_table" REPLICA IDENTITY FULL']
158+
159+
160+
def test_set_replica_identity_case_insensitive_mode():
161+
"""`mode='FULL'` (uppercase) should be accepted, matching adapter case-handling."""
162+
schema = _FakeSchema("ms", ["t1"], _FakeAdapter())
163+
result = set_replica_identity(schema, mode="FULL", dry_run=True)
164+
assert result["ddl"] == ['ALTER TABLE "ms"."t1" REPLICA IDENTITY FULL']
165+
166+
167+
def test_set_replica_identity_unactivated_schema_raises():
168+
"""Schema with database=None (never activated) must raise, not produce malformed DDL."""
169+
schema = _FakeSchema("ms", ["t1"], _FakeAdapter())
170+
schema.database = None
171+
with pytest.raises(DataJointError, match="Schema is not activated"):
172+
set_replica_identity(schema, mode="full", dry_run=True)

0 commit comments

Comments
 (0)