Skip to content

Commit 9720e26

Browse files
Merge remote-tracking branch 'origin/maint/2.0'
2 parents fda042b + bf6234e commit 9720e26

File tree

4 files changed

+118
-6
lines changed

4 files changed

+118
-6
lines changed

src/datajoint/expression.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,10 @@ def to_arrays(self, *attrs, include_key=False, order_by=None, limit=None, offset
10191019
arr = np.array(values)
10201020
except ValueError:
10211021
# Variable-size data (e.g., arrays of different shapes)
1022-
arr = np.array(values, dtype=object)
1022+
# Must assign individually to avoid numpy broadcasting issues
1023+
arr = np.empty(len(values), dtype=object)
1024+
for i, v in enumerate(values):
1025+
arr[i] = v
10231026
result_arrays.append(arr)
10241027

10251028
if include_key:

src/datajoint/jobs.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import platform
1414
import subprocess
1515

16-
from .condition import AndList
16+
from .condition import AndList, Not
1717
from .errors import DataJointError, DuplicateError
1818
from .heading import Heading
1919
from .table import Table
@@ -370,8 +370,6 @@ def refresh(
370370

371371
# Keys that need jobs: in key_source, not in target, not in jobs
372372
# Disable semantic_check for Job table (self) because its attributes may not have matching lineage
373-
from .condition import Not
374-
375373
new_keys = (key_source - self._target).restrict(Not(self), semantic_check=False).proj()
376374
new_key_list = new_keys.keys()
377375

@@ -396,8 +394,10 @@ def refresh(
396394
# 2. Re-pend success jobs if keep_completed=True
397395
if config.jobs.keep_completed:
398396
# Success jobs whose keys are in key_source but not in target
399-
# Disable semantic_check for Job table operations
400-
success_to_repend = self.completed.restrict(key_source, semantic_check=False) - self._target
397+
# Disable semantic_check for Job table operations (job table PK has different lineage than target)
398+
success_to_repend = self.completed.restrict(key_source, semantic_check=False).restrict(
399+
Not(self._target), semantic_check=False
400+
)
401401
repend_keys = success_to_repend.keys()
402402
for key in repend_keys:
403403
(self & key).delete_quick()

tests/integration/test_fetch.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,3 +397,63 @@ def test_to_arrays_without_include_key(lang):
397397
names, langs = result
398398
assert isinstance(names, np.ndarray)
399399
assert isinstance(langs, np.ndarray)
400+
401+
402+
def test_to_arrays_inhomogeneous_shapes(schema_any):
403+
"""Test to_arrays handles arrays of different shapes correctly.
404+
405+
Regression test for https://github.com/datajoint/datajoint-python/issues/1380
406+
"""
407+
table = schema.Longblob()
408+
table.delete()
409+
410+
# Insert arrays with different shapes that numpy would try to broadcast
411+
table.insert(
412+
[
413+
{"id": 0, "data": np.random.randn(100)}, # shape (100,)
414+
{"id": 1, "data": np.random.randn(100, 1)}, # shape (100, 1)
415+
{"id": 2, "data": np.random.randn(100, 2)}, # shape (100, 2)
416+
]
417+
)
418+
419+
# This should not raise ValueError
420+
data = table.to_arrays("data", order_by="id")
421+
422+
# Should return object array with 3 elements
423+
assert data.dtype == object
424+
assert len(data) == 3
425+
426+
# Each element should preserve its original shape
427+
assert data[0].shape == (100,)
428+
assert data[1].shape == (100, 1)
429+
assert data[2].shape == (100, 2)
430+
431+
432+
def test_to_arrays_inhomogeneous_shapes_second_axis(schema_any):
433+
"""Test to_arrays handles arrays differing on second axis.
434+
435+
Regression test for https://github.com/datajoint/datajoint-python/issues/1380
436+
"""
437+
table = schema.Longblob()
438+
table.delete()
439+
440+
# Insert arrays with different shapes on second axis
441+
table.insert(
442+
[
443+
{"id": 0, "data": np.random.randn(100)}, # shape (100,)
444+
{"id": 1, "data": np.random.randn(1, 100)}, # shape (1, 100)
445+
{"id": 2, "data": np.random.randn(2, 100)}, # shape (2, 100)
446+
]
447+
)
448+
449+
# This should not raise ValueError
450+
data = table.to_arrays("data", order_by="id")
451+
452+
# Should return object array with 3 elements
453+
assert data.dtype == object
454+
assert len(data) == 3
455+
456+
# Each element should preserve its original shape
457+
assert data[0].shape == (100,)
458+
assert data[1].shape == (1, 100)
459+
assert data[2].shape == (2, 100)

tests/integration/test_jobs.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,52 @@ def test_long_error_stack(clean_jobs, subject, experiment):
158158
experiment.jobs.error(key, "error message", long_error_stack)
159159
error_stack = experiment.jobs.errors.fetch1("error_stack")
160160
assert error_stack == long_error_stack, "error stacks do not agree"
161+
162+
163+
def test_populate_reserve_jobs_with_keep_completed(clean_jobs, subject, experiment):
164+
"""Test populate(reserve_jobs=True) with keep_completed=True.
165+
166+
Regression test for https://github.com/datajoint/datajoint-python/issues/1379
167+
The bug was that the `-` operator in jobs.refresh() didn't pass semantic_check=False,
168+
causing a DataJointError about different lineages when keep_completed=True.
169+
"""
170+
# Clear experiment data to ensure there's work to do
171+
experiment.delete()
172+
173+
with dj.config.override(jobs={"keep_completed": True, "add_job_metadata": True}):
174+
# Should not raise DataJointError about semantic matching
175+
experiment.populate(reserve_jobs=True)
176+
177+
# Verify jobs completed successfully
178+
assert len(experiment) > 0, "No data was populated"
179+
assert len(experiment.jobs.errors) == 0, "Unexpected errors during populate"
180+
181+
# With keep_completed=True, completed jobs should be retained
182+
assert len(experiment.jobs.completed) > 0, "Completed jobs not retained"
183+
184+
185+
def test_jobs_refresh_with_keep_completed(clean_jobs, subject, experiment):
186+
"""Test that jobs.refresh() works with keep_completed=True.
187+
188+
Regression test for https://github.com/datajoint/datajoint-python/issues/1379
189+
"""
190+
# Clear experiment data and jobs
191+
experiment.delete()
192+
experiment.jobs.delete()
193+
194+
with dj.config.override(jobs={"keep_completed": True, "add_job_metadata": True}):
195+
# Refresh should create pending jobs without semantic matching error
196+
experiment.jobs.refresh()
197+
pending_before = len(experiment.jobs.pending)
198+
assert pending_before > 0, "No pending jobs created"
199+
200+
# Manually reserve and complete a job
201+
key = experiment.jobs.pending.keys(limit=1)[0]
202+
experiment.jobs.reserve(key)
203+
experiment.jobs.complete(key)
204+
205+
# Job should now be completed
206+
assert len(experiment.jobs.completed) == 1, "Job not marked as completed"
207+
208+
# Calling refresh again should not raise semantic matching error
209+
experiment.jobs.refresh() # This was failing before the fix

0 commit comments

Comments
 (0)