From d2ff5dabd6bb8730b6bd7329cfd96ef88bdfc59e Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Mon, 22 Jun 2026 13:52:31 -0600 Subject: [PATCH 01/11] fix: correct texture array-reset bug and add regression test Move numpy.empty array allocation outside the per-object loop in compute_texture. It was being reset on every iteration, discarding all previous objects' Haralick values and leaving only the last object intact. All prior objects received uninitialized memory from numpy.empty. Adds test_texture_values_correct_per_object: independently computes expected Haralick values per object with mahotas and asserts the pipeline output matches, catching any regression where the array is reset inside the loop. Co-Authored-By: Claude Sonnet 4.6 --- src/zedprofiler/featurization/texture.py | 2 +- tests/featurization/test_texture.py | 100 ++++++++++++++++++++++- 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/src/zedprofiler/featurization/texture.py b/src/zedprofiler/featurization/texture.py index 32a9b4d..780d333 100644 --- a/src/zedprofiler/featurization/texture.py +++ b/src/zedprofiler/featurization/texture.py @@ -151,6 +151,7 @@ def compute_texture( # noqa: C901 ) # loop through each label and get the bounding box # to compute features for the object + features = numpy.empty((n_directions, 13, max(labels))) for _, label in enumerate(labels): if int(label) == 0: continue @@ -167,7 +168,6 @@ def compute_texture( # noqa: C901 if not numpy.any(object_mask): continue image_object[~object_mask] = 0 - features = numpy.empty((n_directions, 13, max(labels))) image_object = scale_image(image_object, num_gray_levels=grayscale) try: # calculates 13 Haralick features for each direction (13) diff --git a/tests/featurization/test_texture.py b/tests/featurization/test_texture.py index aaee06a..7de2f3c 100644 --- a/tests/featurization/test_texture.py +++ b/tests/featurization/test_texture.py @@ -3,12 +3,16 @@ import numpy as np import pandas as pd import pytest +import skimage.measure from beartype import beartype from pydantic import BaseModel, ConfigDict, field_validator mahotas = pytest.importorskip("mahotas") -from zedprofiler.featurization.texture import compute_texture # noqa: E402 +from zedprofiler.featurization.texture import compute_texture, scale_image # noqa: E402 + +LABEL_OBJ_1 = 1 +LABEL_OBJ_2 = 2 class ImageSetLoaderModel(BaseModel): @@ -56,3 +60,97 @@ def test_compute_texture_basic( df = compute_texture(loader, distance=1, grayscale=256) assert isinstance(df, pd.DataFrame) assert "Metadata_Object_ObjectID" in df.columns + + +def test_texture_values_correct_per_object() -> None: + """Each object must receive its own Haralick values, not another object's. + + Regression test for a bug where features = numpy.empty(...) was called + inside the per-object loop, resetting the array on every iteration so only + the last object's values survived. All prior objects received uninitialized + memory from numpy.empty. + + Two objects with distinct pixel intensities are profiled. The test + independently computes expected Haralick values per object using mahotas + directly and asserts the pipeline output matches. It fails on the buggy code + because object 1 gets garbage values instead of its own. + """ + # Object 1: bright uniform region (intensity 200) + # Object 2: dim uniform region (intensity 50) + # AngularSecondMoment for a uniform region is 1.0; if the array is reset + # inside the loop, object 1 gets uninitialized values instead. + shape = (20, 20, 20) + image = np.zeros(shape, dtype=np.uint8) + label = np.zeros(shape, dtype=np.int32) + + image[1:5, 1:5, 1:5] = 200 + label[1:5, 1:5, 1:5] = LABEL_OBJ_1 + + image[14:18, 14:18, 14:18] = 50 + label[14:18, 14:18, 14:18] = LABEL_OBJ_2 + + imgset = ImageSetLoaderModel() + loader = ObjectLoaderModel( + image=image, + label_image=label, + object_ids=[LABEL_OBJ_1, LABEL_OBJ_2], + image_set_loader=imgset, + ) + + df = compute_texture(loader, distance=1, grayscale=256) + + # Build per-object lookup: {object_id: {feature_col: value}} + obj_textures: dict[int, dict[str, float]] = {} + for _, row in df.iterrows(): + obj_id = int(row["Metadata_Object_ObjectID"]) + obj_textures.setdefault(obj_id, {}) + for col in df.columns: + if col != "Metadata_Object_ObjectID": + obj_textures[obj_id][col] = row[col] + + assert LABEL_OBJ_1 in obj_textures, "Object 1 missing from texture output" + assert LABEL_OBJ_2 in obj_textures, "Object 2 missing from texture output" + + for obj_id in [LABEL_OBJ_1, LABEL_OBJ_2]: + mask = label == obj_id + crop_img = image.copy() + crop_img[~mask] = 0 + + props = skimage.measure.regionprops_table( + mask.astype(np.uint8), properties=["bbox"] + ) + z0, y0, x0 = ( + int(props["bbox-0"][0]), + int(props["bbox-1"][0]), + int(props["bbox-2"][0]), + ) + z1, y1, x1 = ( + int(props["bbox-3"][0]), + int(props["bbox-4"][0]), + int(props["bbox-5"][0]), + ) + crop_scaled = scale_image(crop_img[z0:z1, y0:y1, x0:x1], num_gray_levels=256) + + expected = mahotas.features.haralick( + ignore_zeros=True, + f=crop_scaled, + distance=1, + compute_14th_feature=False, + ) + expected_asm = expected[0, 0] + + asm_cols = [ + c + for c in obj_textures[obj_id] + if "AngularSecondMoment" in c and "-00-" in c + ] + assert asm_cols, ( + f"No AngularSecondMoment-direction-00 column for object {obj_id}" + ) + actual_asm = obj_textures[obj_id][asm_cols[0]] + + assert np.isclose(actual_asm, expected_asm, rtol=1e-5), ( + f"Object {obj_id} AngularSecondMoment: " + f"got {actual_asm}, expected {expected_asm}. " + f"Likely caused by numpy.empty reset inside the per-object loop." + ) From 686c067cf79c9f864a3d8e9b74d50166c5cfa285 Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Mon, 22 Jun 2026 14:00:09 -0600 Subject: [PATCH 02/11] fix: use find_boundaries(mode='inner') to fix MinIntensityEdge always-zero bug get_outline used find_boundaries with the default mode='thick', which returns both inner (object-side) and outer (background-side) boundary pixels. Because the image is zeroed outside the cell before get_outline is called, outer boundary pixels always have intensity 0, so numpy.min(edge pixels) always returned 0 regardless of actual edge intensity. Switching to mode='inner' restricts the edge mask to pixels inside the object boundary, so all edge pixels have real cell intensities. Adds test_min_intensity_edge_not_zero_for_bright_cell: a cell with uniform intensity 100 must have MinIntensityEdge=100, not 0. Co-Authored-By: Claude Sonnet 4.6 --- src/zedprofiler/featurization/intensity.py | 2 +- tests/featurization/test_intensity.py | 45 ++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/zedprofiler/featurization/intensity.py b/src/zedprofiler/featurization/intensity.py index a7feac9..def6976 100644 --- a/src/zedprofiler/featurization/intensity.py +++ b/src/zedprofiler/featurization/intensity.py @@ -31,7 +31,7 @@ def get_outline(mask: numpy.ndarray) -> numpy.ndarray: """ outline = numpy.zeros_like(mask) for z in range(mask.shape[0]): - outline[z] = skimage.segmentation.find_boundaries(mask[z]) + outline[z] = skimage.segmentation.find_boundaries(mask[z], mode="inner") return outline diff --git a/tests/featurization/test_intensity.py b/tests/featurization/test_intensity.py index 2f01168..3728493 100644 --- a/tests/featurization/test_intensity.py +++ b/tests/featurization/test_intensity.py @@ -41,6 +41,51 @@ def make_label_and_image( return image, label +def test_min_intensity_edge_not_zero_for_bright_cell() -> None: + """MinIntensityEdge must reflect actual boundary intensities, not 0. + + Regression test for a bug where get_outline used find_boundaries with the + default mode='thick', which returns both inner (object-side) and outer + (background-side) boundary pixels. Because the image is zeroed outside the + cell before the outline is computed, outer boundary pixels always have + intensity 0, making numpy.min always return 0. + + The fix is to use mode='inner' so only pixels inside the object boundary + are included in the edge mask. + """ + shape = (10, 10, 10) + image = np.zeros(shape, dtype=np.float32) + label = np.zeros(shape, dtype=np.int32) + + image[3:7, 3:7, 3:7] = 100.0 + label[3:7, 3:7, 3:7] = 1 + + imgset = ImageSetLoaderModel() + loader = ObjectLoaderModel( + image=image, + label_image=label, + object_ids=[1], + image_set_loader=imgset, + ) + + df = compute_intensity(loader) + + row = df[(df["Metadata_Object_ObjectID"] == 1)] + min_edge_col = [c for c in df.columns if "MinIntensityEdge" in c] + assert min_edge_col, "MinIntensityEdge column not found in output" + + min_edge = row[min_edge_col[0]].values[0] + + assert min_edge != 0, ( + "MinIntensityEdge is 0 for a cell with uniform intensity 100. " + "Likely caused by find_boundaries(mode='thick') including outer " + "(background) boundary pixels that have been zeroed." + ) + assert min_edge == pytest.approx(100.0), ( + f"MinIntensityEdge should be 100 (cell intensity) but got {min_edge}" + ) + + @pytest.mark.parametrize("shape,center", [((6, 6, 6), (3, 3, 3))]) def test_compute_intensity_basic( shape: tuple[int, int, int], center: tuple[int, int, int] From 9385716becca68bacd71b14e2a6b9dc5f2155579 Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Mon, 22 Jun 2026 20:34:51 -0600 Subject: [PATCH 03/11] fix: replicate NF1_3D featurization bug fixes in ZedProfiler Port all fixes from the NF1_3D audit to the equivalent ZedProfiler modules: - colocalization: initialize combined_thresh before try block (Bug 2), add numpy.any guard on overlap section, fix Accurate/Fast dispatch inversion (Bug 3), remove dead pre-loop pearsonr initialization (Bug 4), and fix compute_colocalization hard-coding fast_costes='Accurate' ignoring the caller's argument (Bug B) - granularity: guard per-axis scale factor against division-by-zero when a dimension has size 1, in both _upsample_3d and the background upsampling block (Bug 7) - neighbors: replace bbox-crop adjacency detection with 1-voxel morphological dilation so face-touching objects are detected correctly (Bug 5) - intensity: rename bbox variables to avoid collision with max_intensity_* names (Bug 9), add index=1 to scipy.ndimage.sum so it sums only the target object's voxels (Bug 10) Add regression tests in each test file covering all fixed bugs. Co-Authored-By: Claude Sonnet 4.6 --- .../featurization/colocalization.py | 61 +++++++------- src/zedprofiler/featurization/granularity.py | 18 ++-- src/zedprofiler/featurization/intensity.py | 35 ++++---- src/zedprofiler/featurization/neighbors.py | 21 ++--- tests/featurization/test_colocalization.py | 83 +++++++++++++++++++ tests/featurization/test_granularity.py | 44 ++++++++++ tests/featurization/test_intensity.py | 48 +++++++++++ tests/featurization/test_neighbors.py | 38 +++++++++ 8 files changed, 284 insertions(+), 64 deletions(-) diff --git a/src/zedprofiler/featurization/colocalization.py b/src/zedprofiler/featurization/colocalization.py index 8d421bf..9d27e57 100644 --- a/src/zedprofiler/featurization/colocalization.py +++ b/src/zedprofiler/featurization/colocalization.py @@ -117,8 +117,6 @@ def linear_costes_threshold_calculation( first_image_max = first_image.max() second_image_max = second_image.max() - # Initialize without a threshold - costReg, _ = scipy.stats.pearsonr(first_image, second_image) thr_first_image_c = i thr_second_image_c = (a * i) + b while i > first_image_max and (a * i) + b > second_image_max: @@ -361,6 +359,9 @@ def calculate_colocalization( # noqa: PLR0912, PLR0915 ################################################################################################ # Threshold as percentage of maximum intensity of objects in each channel + # Initialise before the try block so combined_thresh is always bound even + # when the except branch fires (numpy.max raises ValueError on empty arrays). + combined_thresh = numpy.zeros_like(cropped_image_1, dtype=bool) try: tff = (thr / 100) * numpy.max(cropped_image_1) tss = (thr / 100) * numpy.max(cropped_image_2) @@ -394,28 +395,30 @@ def calculate_colocalization( # noqa: PLR0912, PLR0915 # Calculate the overlap coefficient ################################################################################################ - fpsq = scipy.ndimage.sum( - cropped_image_1[combined_thresh] ** 2, - ) - spsq = scipy.ndimage.sum( - cropped_image_2[combined_thresh] ** 2, - ) - pdt = numpy.sqrt(numpy.array(fpsq) * numpy.array(spsq)) - overlap = ( - scipy.ndimage.sum( - cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + if numpy.any(combined_thresh): + fpsq = scipy.ndimage.sum( + cropped_image_1[combined_thresh] ** 2, ) - / pdt - ) - # leave in for now given they are not exported but still calculated - K1 = scipy.ndimage.sum( - cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], - ) / (numpy.array(fpsq)) - K2 = scipy.ndimage.sum( - cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], - ) / (numpy.array(spsq)) - if K1 == K2: - pass + spsq = scipy.ndimage.sum( + cropped_image_2[combined_thresh] ** 2, + ) + pdt = numpy.sqrt(numpy.array(fpsq) * numpy.array(spsq)) + overlap = ( + scipy.ndimage.sum( + cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + ) + / pdt + ) + K1 = scipy.ndimage.sum( + cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + ) / (numpy.array(fpsq)) + K2 = scipy.ndimage.sum( + cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + ) / (numpy.array(spsq)) + if K1 == K2: + pass + else: + overlap, K1, K2 = 0.0, 0.0, 0.0 # first_pixels, second_pixels = flattened image arrays # combined_thresh = boolean mask of pixels above threshold in both channels @@ -475,13 +478,13 @@ def calculate_colocalization( # noqa: PLR0912, PLR0915 scale = UINT8_MAX if fast_costes == "Accurate": - thr_first_image_c, thr_second_image_c = bisection_costes_threshold_calculation( - cropped_image_1, cropped_image_2, scale - ) - else: thr_first_image_c, thr_second_image_c = linear_costes_threshold_calculation( cropped_image_1, cropped_image_2, scale, fast_costes ) + else: + thr_first_image_c, thr_second_image_c = bisection_costes_threshold_calculation( + cropped_image_1, cropped_image_2, scale + ) # Costes' thershold for entire image is applied to each object first_image_above_thr = cropped_image_1 > thr_first_image_c @@ -566,8 +569,8 @@ def compute_colocalization( # noqa: C901, PLR0912 colocalization_features = calculate_colocalization( cropped_image_1=cropped_image1, cropped_image_2=cropped_image2, - thr=15, - fast_costes="Accurate", + thr=thr, + fast_costes=fast_costes, ) # Build a simple dict row (avoid pandas dependency) diff --git a/src/zedprofiler/featurization/granularity.py b/src/zedprofiler/featurization/granularity.py index 31c9717..f53681e 100644 --- a/src/zedprofiler/featurization/granularity.py +++ b/src/zedprofiler/featurization/granularity.py @@ -104,9 +104,12 @@ def _upsample_3d( k, i, j = numpy.mgrid[ 0 : original_shape[0], 0 : original_shape[1], 0 : original_shape[2] ].astype(float) - k *= float(subsampled_shape[0] - 1) / float(original_shape[0] - 1) - i *= float(subsampled_shape[1] - 1) / float(original_shape[1] - 1) - j *= float(subsampled_shape[2] - 1) / float(original_shape[2] - 1) + if original_shape[0] > 1: + k *= float(subsampled_shape[0] - 1) / float(original_shape[0] - 1) + if original_shape[1] > 1: + i *= float(subsampled_shape[1] - 1) / float(original_shape[1] - 1) + if original_shape[2] > 1: + j *= float(subsampled_shape[2] - 1) / float(original_shape[2] - 1) return scipy.ndimage.map_coordinates(data, (k, i, j), order=1) @@ -283,9 +286,12 @@ def compute_granularity( # noqa: C901, PLR0912, PLR0913, PLR0915 k, i, j = numpy.mgrid[ 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] ].astype(float) - k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) - i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) - j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) + if new_shape[0] > 1: + k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) + if new_shape[1] > 1: + i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) + if new_shape[2] > 1: + j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) back_pixels = scipy.ndimage.map_coordinates(back_pixels, (k, i, j), order=1) # Subtract background diff --git a/src/zedprofiler/featurization/intensity.py b/src/zedprofiler/featurization/intensity.py index def6976..29b8140 100644 --- a/src/zedprofiler/featurization/intensity.py +++ b/src/zedprofiler/featurization/intensity.py @@ -80,29 +80,34 @@ def compute_intensity( # noqa: PLR0915 # Extract only coordinates where object exists z_indices, y_indices, x_indices = numpy.where(selected_label_object > 0) - min_z, max_z = numpy.min(z_indices), numpy.max(z_indices) - min_y, max_y = numpy.min(y_indices), numpy.max(y_indices) - min_x, max_x = numpy.min(x_indices), numpy.max(x_indices) + bbox_min_z, bbox_max_z = numpy.min(z_indices), numpy.max(z_indices) + bbox_min_y, bbox_max_y = numpy.min(y_indices), numpy.max(y_indices) + bbox_min_x, bbox_max_x = numpy.min(x_indices), numpy.max(x_indices) # Crop to bounding box for efficiency cropped_label = selected_label_object[ - min_z : max_z + 1, min_y : max_y + 1, min_x : max_x + 1 + bbox_min_z : bbox_max_z + 1, + bbox_min_y : bbox_max_y + 1, + bbox_min_x : bbox_max_x + 1, ] cropped_image = selected_image_object[ - min_z : max_z + 1, min_y : max_y + 1, min_x : max_x + 1 + bbox_min_z : bbox_max_z + 1, + bbox_min_y : bbox_max_y + 1, + bbox_min_x : bbox_max_x + 1, ] # Create coordinate grids for the bounding box mesh_z, mesh_y, mesh_x = numpy.mgrid[ - min_z : max_z + 1, # + 1 to include the max index - min_y : max_y + 1, - min_x : max_x + 1, + bbox_min_z : bbox_max_z + 1, # + 1 to include the max index + bbox_min_y : bbox_max_y + 1, + bbox_min_x : bbox_max_x + 1, ] # calculate the integrated intensity integrated_intensity = scipy.ndimage.sum( selected_image_object, selected_label_object, + index=1, ) # calculate the volume volume = numpy.sum(selected_label_object) @@ -125,10 +130,10 @@ def compute_intensity( # noqa: PLR0915 upper_quartile_intensity = numpy.percentile(non_zero_pixels_object, 75) # median intensity median_intensity = numpy.median(non_zero_pixels_object) - # max intensity location - max_z, max_y, max_x = scipy.ndimage.maximum_position( - selected_image_object, - ) # z, y, x + # location of maximum intensity pixel (z, y, x) + max_intensity_z, max_intensity_y, max_intensity_x = ( + scipy.ndimage.maximum_position(selected_image_object) + ) # Calculate center of mass (geometric center) using cropped arrays object_mask = cropped_label > 0 @@ -177,9 +182,9 @@ def compute_intensity( # noqa: PLR0915 "StdIntensityEdge": std_intensity_edge, "MinIntensityEdge": min_intensity_edge, "MaxIntensityEdge": max_intensity_edge, - "MaxZ": max_z, - "MaxY": max_y, - "MaxX": max_x, + "MaxZ": max_intensity_z, + "MaxY": max_intensity_y, + "MaxX": max_intensity_x, "CMI.X": cmi_x, "CMI.Y": cmi_y, "CMI.Z": cmi_z, diff --git a/src/zedprofiler/featurization/neighbors.py b/src/zedprofiler/featurization/neighbors.py index 3c2e17a..cdfeafb 100644 --- a/src/zedprofiler/featurization/neighbors.py +++ b/src/zedprofiler/featurization/neighbors.py @@ -6,6 +6,7 @@ import numpy import pandas import skimage.measure +import skimage.morphology from zedprofiler.contracts import validate_column_name_schema from zedprofiler.IO.feature_writing_utils import format_morphology_feature_name @@ -138,8 +139,6 @@ def compute_neighbors( props_label["bbox-4"][0], props_label["bbox-5"][0], ) - original_bbox = (z_min, y_min, x_min, z_max, y_max, x_max) - new_z_min, new_z_max = neighbors_expand_box( min_coor=image_global_min_coord_z, max_coord=image_global_max_coord_z, @@ -163,18 +162,12 @@ def compute_neighbors( ) bbox = (new_z_min, new_y_min, new_x_min, new_z_max, new_y_max, new_x_max) croppped_neighbor_image = crop_3D_image(image=label_object, bbox=bbox) - self_cropped_neighbor_image = crop_3D_image( - image=label_object, bbox=original_bbox - ) - # find all the unique values in the cropped image of the object of interest - # this is the number of neighbors in the cropped image - n_neighbors_adjacent = ( - len( - numpy.unique( - self_cropped_neighbor_image[self_cropped_neighbor_image > 0] - ) - ) - - 1 + binary_mask = label_object == label + dilated_mask = skimage.morphology.dilation(binary_mask) + labels_in_dilation = label_object[dilated_mask] + adjacent_labels = numpy.unique(labels_in_dilation) + n_neighbors_adjacent = int( + numpy.sum((adjacent_labels != 0) & (adjacent_labels != label)) ) # find all the unique values in the expanded cropped image of the diff --git a/tests/featurization/test_colocalization.py b/tests/featurization/test_colocalization.py index 8c5e26d..7a2f205 100644 --- a/tests/featurization/test_colocalization.py +++ b/tests/featurization/test_colocalization.py @@ -1,5 +1,7 @@ from __future__ import annotations +import unittest.mock + import numpy as np import pandas as pd import pytest @@ -102,6 +104,87 @@ def test_prepare_two_images_for_colocalization_crops() -> None: assert cropped2.max() >= expected_peak_im2 +def test_combined_thresh_does_not_raise_unbound_local_error() -> None: + """combined_thresh must be bound even when images are empty (Bug 2). + + Before the fix, combined_thresh was only assigned inside the try-else block, + so when numpy.max raised ValueError on an empty crop, the subsequent + ``if numpy.any(combined_thresh)`` reference raised UnboundLocalError. + """ + empty = np.zeros((0,), dtype=float) + try: + calculate_colocalization(empty, empty, thr=15, fast_costes="Accurate") + except UnboundLocalError as e: + pytest.fail(f"UnboundLocalError raised — combined_thresh not initialised: {e}") + except Exception: + pass # any other exception (ValueError, ZeroDivisionError) is acceptable + + +def test_accurate_mode_calls_linear_not_bisection() -> None: + """fast_costes='Accurate' must route to linear_costes, not bisection (Bug 3). + + The two algorithms can converge to the same threshold for some inputs, so + the test uses unittest.mock.patch to spy on which function is actually called + rather than comparing numeric results. + """ + rng = np.random.default_rng(42) + img = rng.uniform(0, 255, (8, 8, 8)).astype(float) + + with ( + unittest.mock.patch( + "zedprofiler.featurization.colocalization.linear_costes_threshold_calculation", + wraps=linear_costes_threshold_calculation, + ) as mock_linear, + unittest.mock.patch( + "zedprofiler.featurization.colocalization.bisection_costes_threshold_calculation", + wraps=bisection_costes_threshold_calculation, + ) as mock_bisection, + ): + calculate_colocalization(img, img, thr=15, fast_costes="Accurate") + + assert mock_linear.called, ( + "linear_costes_threshold_calculation was not called for fast_costes='Accurate'" + ) + assert not mock_bisection.called, ( + "bisection_costes_threshold_calculation was called for fast_costes='Accurate'" + ) + + +def test_compute_colocalization_respects_fast_costes_parameter() -> None: + """compute_colocalization must forward fast_costes to calculate_colocalization. + + Bug B (ZedProfiler-specific): + + Before the fix, the inner call hard-coded fast_costes='Accurate', so the + caller's value was silently ignored. The test passes fast_costes='Fast' and + checks via mock that the right algorithm (bisection) is invoked. + """ + imgset = ImageSetLoaderModel() + shape = (7, 7, 7) + center = (3, 3, 3) + label, im1, im2 = make_pair(shape, center) + loader = TwoObjectLoaderModel( + image_set_loader=imgset, + compartment="Cell", + image1=im1, + image2=im2, + label_image=label, + object_ids=[1], + ) + + with unittest.mock.patch( + "zedprofiler.featurization.colocalization.bisection_costes_threshold_calculation", + wraps=bisection_costes_threshold_calculation, + ) as mock_bisection: + compute_colocalization(loader, channel1="A", channel2="B", fast_costes="Fast") + + assert mock_bisection.called, ( + "bisection_costes_threshold_calculation was not called for " + "fast_costes='Fast' — compute_colocalization may still be " + "hard-coding fast_costes='Accurate'" + ) + + def test_calculate_colocalization_identical_images() -> None: # identical images should give high correlation and Manders near 1 rng = np.random.default_rng(0) diff --git a/tests/featurization/test_granularity.py b/tests/featurization/test_granularity.py index b1ac407..465a2a4 100644 --- a/tests/featurization/test_granularity.py +++ b/tests/featurization/test_granularity.py @@ -159,6 +159,50 @@ class Dummy: assert isinstance(df, pd.DataFrame) +def test_upsample_3d_no_division_by_zero_when_dim_is_one() -> None: + """_upsample_3d must not raise ZeroDivisionError when any dim has size 1 (Bug 7). + + Before the fix, the per-axis scale factor was always computed as + ``(subsampled_shape[k] - 1) / (original_shape[k] - 1)`` without guarding + against ``original_shape[k] == 1``, which produces a zero denominator. + """ + data = np.ones((1, 4, 4), dtype=float) + try: + result = _upsample_3d(data, data.shape, (1, 8, 8)) + except ZeroDivisionError as e: + pytest.fail(f"ZeroDivisionError in _upsample_3d with dim-1 axis: {e}") + assert result.shape == (1, 8, 8) + + +def test_granularity_no_crash_on_single_z_slice() -> None: + """compute_granularity must not crash when the input has only one Z slice (Bug 7). + + The division-by-zero guard must also protect the background upsampling block + inside compute_granularity, not just _upsample_3d. + """ + shape = (1, 12, 12) + img = np.zeros(shape, dtype=float) + lab = np.zeros(shape, dtype=int) + img[0, 5, 5] = 10.0 + lab[0, 5, 5] = 1 + + class Dummy: + image = img + label_image = lab + object_ids: ClassVar[list[int]] = [1] + image_set_loader = type("ISL", (), {"image_set_name": "s"})() + compartment = "Cell" + channel = "Ch1" + + try: + df = compute_granularity(Dummy(), radius=1, granular_spectrum_length=3) + except ZeroDivisionError as e: + pytest.fail( + f"ZeroDivisionError in compute_granularity with single-Z image: {e}" + ) + assert isinstance(df, pd.DataFrame) + + def test_compute_granularity_preserves_sparse_label_ids() -> None: # Sparse labels should not be renumbered to 1..n internally. shape = (8, 8, 8) diff --git a/tests/featurization/test_intensity.py b/tests/featurization/test_intensity.py index 3728493..0cc9035 100644 --- a/tests/featurization/test_intensity.py +++ b/tests/featurization/test_intensity.py @@ -86,6 +86,54 @@ def test_min_intensity_edge_not_zero_for_bright_cell() -> None: ) +def test_integrated_intensity_is_per_object_not_global() -> None: + """IntegratedIntensity must reflect only the target object's voxels (Bug 10). + + Before the fix, scipy.ndimage.sum was called without an explicit ``index`` + argument. With a binarised label array (values 0/1), omitting ``index`` + causes ndimage.sum to return the total over all labelled pixels, which can + include voxels from other objects if the label array was not fully isolated + beforehand. Passing ``index=1`` constrains the sum to only the pixels + belonging to label 1. + + The test constructs an image where one object (label 1, intensity 1.0) is + isolated from a second brighter region that is masked out. Without the fix, + the sum bleeds into the second region and the integrated intensity is too + large. + """ + shape = (10, 10, 10) + image = np.zeros(shape, dtype=np.float32) + label = np.zeros(shape, dtype=np.int32) + + # Object 1: single bright voxel, intensity 1.0 + image[2, 2, 2] = 1.0 + label[2, 2, 2] = 1 + + # A second region that is NOT labelled (background) but has high intensity. + # If scipy.ndimage.sum leaks into it, IntegratedIntensity will be inflated. + image[7, 7, 7] = 100.0 + + imgset = ImageSetLoaderModel() + loader = ObjectLoaderModel( + image=image, + label_image=label, + object_ids=[1], + image_set_loader=imgset, + ) + + df = compute_intensity(loader) + ii_col = [c for c in df.columns if "IntegratedIntensity" in c and "Edge" not in c] + assert ii_col, "IntegratedIntensity column not found in output" + + row = df[df["Metadata_Object_ObjectID"] == 1] + ii = float(row[ii_col[0]].values[0]) + expected = 1.0 + assert np.isclose(ii, expected, atol=1e-3), ( + f"IntegratedIntensity = {ii}, expected {expected}. " + "Likely caused by scipy.ndimage.sum without index=1 summing all pixels." + ) + + @pytest.mark.parametrize("shape,center", [((6, 6, 6), (3, 3, 3))]) def test_compute_intensity_basic( shape: tuple[int, int, int], center: tuple[int, int, int] diff --git a/tests/featurization/test_neighbors.py b/tests/featurization/test_neighbors.py index cb5b3f3..3d0ee18 100644 --- a/tests/featurization/test_neighbors.py +++ b/tests/featurization/test_neighbors.py @@ -128,6 +128,44 @@ def test_mahalanobis_small_and_regularized_and_singular() -> None: assert np.allclose(md_sing, 0.0) +def test_neighbors_count_adjacent_detects_touching_cells() -> None: + """NeighborsCountAdjacent must be > 0 for two directly touching objects (Bug 5). + + Before the fix, adjacency was detected by cropping to the regionprops bbox + of each object and counting unique labels inside. Because regionprops uses + exclusive-max indices (Python slice convention), the boundary voxel of the + touching neighbour was excluded from the crop, making the count always 0. + + The fix replaces the bbox-crop approach with a 1-voxel morphological dilation + of the object mask; any label in the dilated region that is not the object + itself is a true neighbour. + """ + shape = (10, 10, 10) + lab = np.zeros(shape, dtype=int) + # Object 1 occupies [2:5, 2:5, 2:5]; Object 2 occupies [5:8, 2:5, 2:5]. + # They share the plane at z=5, so they are face-adjacent (touching). + lab[2:5, 2:5, 2:5] = 1 + lab[5:8, 2:5, 2:5] = 2 + + imgset = ImageSetLoaderModel() + loader = ObjectLoaderModel( + label_image=lab, + object_ids=[1, 2], + image_set_loader=imgset, + ) + + df = compute_neighbors(loader, distance_threshold=5, anisotropy_factor=1) + obj1_row = df[df["Metadata_Object_ObjectID"] == 1] + adjacent_col = [c for c in df.columns if "NeighborsCountAdjacent" in c] + assert adjacent_col, "NeighborsCountAdjacent column not found in output" + + n_adj = int(obj1_row[adjacent_col[0]].values[0]) + assert n_adj > 0, ( + "NeighborsCountAdjacent is 0 for two touching cells — " + "likely caused by bbox-crop excluding the shared boundary voxel" + ) + + def test_create_results_dataframe_and_errors_and_plots() -> None: # create a simple classification results dict results = { From 5c231c2ddd75f35b88f7b4c08a04271b44180cc0 Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 25 Jun 2026 05:26:25 -0600 Subject: [PATCH 04/11] feat: add three-mode Costes dispatch and fix bisection unit mismatch Adds "Fast" as a distinct third mode (linear with adaptive skipping) matching CellProfiler's Accurate/Fast/Faster nomenclature. Previously "Fast" routed to bisection -- it now routes to linear_costes with fast_costes="Fast", and "Faster" routes to bisection. Fixes bisection_costes_threshold_calculation returning thresholds normalised to [0,1] (mid/scale_max) instead of raw pixel units. The outer dispatch's `image > thr` comparison requires raw values, so this was silently bypassing Costes thresholding in "Faster" mode. CellProfiler's library has the same bug; this is an intentional divergence, documented in a comment. Also updates both docstrings to describe all three modes, and adds value-based tests: convergence across all three modes, low threshold for correlated images, high threshold for anti-correlated images (linear only), and a documented test for bisection's degenerate anti-correlated edge case. Co-Authored-By: Claude Sonnet 4.6 --- .../featurization/colocalization.py | 48 ++++-- tests/featurization/test_colocalization.py | 137 +++++++++++++++++- 2 files changed, 169 insertions(+), 16 deletions(-) diff --git a/src/zedprofiler/featurization/colocalization.py b/src/zedprofiler/featurization/colocalization.py index 9d27e57..59dced5 100644 --- a/src/zedprofiler/featurization/colocalization.py +++ b/src/zedprofiler/featurization/colocalization.py @@ -207,7 +207,11 @@ def bisection_costes_threshold_calculation( valid = 1 while lastmid != mid: - thr_first_image_c = mid / scale_max + # Use raw pixel units (not normalised) so the threshold is comparable + # with linear_costes_threshold_calculation and with the outer dispatch's + # `image > thr` comparison. CellProfiler's library has the same + # mid/scale_max normalisation bug; this is an intentional divergence. + thr_first_image_c = float(mid) thr_second_image_c = (a * thr_first_image_c) + b combt = (first_image < thr_first_image_c) | (second_image < thr_second_image_c) if numpy.count_nonzero(combt) <= MIN_PEARSON_POINTS: @@ -232,7 +236,7 @@ def bisection_costes_threshold_calculation( else: mid = ((right - left) // 2) + left - thr_first_image_c = (valid - 1) / scale_max + thr_first_image_c = float(valid - 1) thr_second_image_c = (a * thr_first_image_c) + b return thr_first_image_c, thr_second_image_c @@ -302,7 +306,7 @@ def prepare_two_images_for_colocalization( # noqa: PLR0913 return cropped_image_1, cropped_image_2 -def calculate_colocalization( # noqa: PLR0912, PLR0915 +def calculate_colocalization( # noqa: PLR0912, PLR0915, C901 cropped_image_1: numpy.ndarray, cropped_image_2: numpy.ndarray, thr: int = 15, @@ -323,9 +327,13 @@ def calculate_colocalization( # noqa: PLR0912, PLR0915 The threshold for the Manders' coefficients, by default 15 fast_costes : str, optional The mode for Costes' threshold calculation, by default "Accurate". - Options are "Accurate" or "Fast". - "Accurate" uses a linear algorithm, while "Fast" uses a bisection algorithm. - The "Fast" mode is faster but less accurate. + Options are "Accurate", "Fast", or "Faster" (matching CellProfiler's + three Costes methods). "Accurate" tests every threshold value using a + linear scan (slowest, most precise). "Fast" uses the same linear scan + but skips candidate thresholds when the Pearson R is far from the + crossing point (faster, slightly less precise). "Faster" uses a + bisection algorithm and is substantially faster for 16-bit images + (least precise). Returns ------- @@ -479,11 +487,23 @@ def calculate_colocalization( # noqa: PLR0912, PLR0915 if fast_costes == "Accurate": thr_first_image_c, thr_second_image_c = linear_costes_threshold_calculation( - cropped_image_1, cropped_image_2, scale, fast_costes + first_image=cropped_image_1, + second_image=cropped_image_2, + scale_max=scale, + fast_costes="Accurate", ) - else: + elif fast_costes == "Fast": + thr_first_image_c, thr_second_image_c = linear_costes_threshold_calculation( + first_image=cropped_image_1, + second_image=cropped_image_2, + scale_max=scale, + fast_costes="Fast", + ) + else: # "Faster" thr_first_image_c, thr_second_image_c = bisection_costes_threshold_calculation( - cropped_image_1, cropped_image_2, scale + first_image=cropped_image_1, + second_image=cropped_image_2, + scale_max=scale, ) # Costes' thershold for entire image is applied to each object @@ -540,9 +560,13 @@ def compute_colocalization( # noqa: C901, PLR0912 The threshold for the Manders' coefficients, by default 15 fast_costes : str, optional The mode for Costes' threshold calculation, by default "Accurate". - Options are "Accurate" or "Fast". - "Accurate" uses a linear algorithm, while "Fast" uses a bisection algorithm. - The "Fast" mode is faster but less accurate. + Options are "Accurate", "Fast", or "Faster" (matching CellProfiler's + three Costes methods). "Accurate" tests every threshold value using a + linear scan (slowest, most precise). "Fast" uses the same linear scan + but skips candidate thresholds when the Pearson R is far from the + crossing point (faster, slightly less precise). "Faster" uses a + bisection algorithm and is substantially faster for 16-bit images + (least precise). channel1 : str | None, optional The name of the first channel, used for feature naming, by default None channel2 : str | None, optional diff --git a/tests/featurization/test_colocalization.py b/tests/featurization/test_colocalization.py index 7a2f205..deda149 100644 --- a/tests/featurization/test_colocalization.py +++ b/tests/featurization/test_colocalization.py @@ -156,8 +156,8 @@ def test_compute_colocalization_respects_fast_costes_parameter() -> None: Bug B (ZedProfiler-specific): Before the fix, the inner call hard-coded fast_costes='Accurate', so the - caller's value was silently ignored. The test passes fast_costes='Fast' and - checks via mock that the right algorithm (bisection) is invoked. + caller's value was silently ignored. The test passes fast_costes='Faster' and + checks via mock that bisection is invoked (the only mode that reaches bisection). """ imgset = ImageSetLoaderModel() shape = (7, 7, 7) @@ -176,15 +176,144 @@ def test_compute_colocalization_respects_fast_costes_parameter() -> None: "zedprofiler.featurization.colocalization.bisection_costes_threshold_calculation", wraps=bisection_costes_threshold_calculation, ) as mock_bisection: - compute_colocalization(loader, channel1="A", channel2="B", fast_costes="Fast") + compute_colocalization(loader, channel1="A", channel2="B", fast_costes="Faster") assert mock_bisection.called, ( "bisection_costes_threshold_calculation was not called for " - "fast_costes='Fast' — compute_colocalization may still be " + "fast_costes='Faster' — compute_colocalization may still be " "hard-coding fast_costes='Accurate'" ) +@pytest.fixture() +def high_contrast_images() -> tuple[np.ndarray, np.ndarray]: + """Two 1-D images with a bright correlated signal well above background.""" + rng = np.random.default_rng(0) + background = rng.uniform(0, 50, 300) + signal_mask = np.zeros(300, dtype=bool) + signal_mask[100:150] = True + img1 = background.copy() + img1[signal_mask] = 200.0 + img2 = background.copy() + img2[signal_mask] = 180.0 + return img1, img2 + + +def test_all_costes_modes_converge_on_same_threshold( + high_contrast_images: tuple[np.ndarray, np.ndarray], +) -> None: + """All three modes must agree to within 15 pixel units on realistic images.""" + img1, img2 = high_contrast_images + thr_accurate, _ = linear_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255, fast_costes="Accurate" + ) + thr_fast, _ = linear_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255, fast_costes="Fast" + ) + thr_faster, _ = bisection_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255 + ) + tolerance = 15 + assert abs(thr_accurate - thr_fast) <= tolerance, ( + f"Accurate ({thr_accurate:.1f}) and Fast ({thr_fast:.1f}) diverged " + f"by more than {tolerance}" + ) + assert abs(thr_accurate - thr_faster) <= tolerance, ( + f"Accurate ({thr_accurate:.1f}) and Faster ({thr_faster:.1f}) diverged " + f"by more than {tolerance}" + ) + + +def test_costes_threshold_low_for_perfectly_correlated_images() -> None: + """All three modes must return a low threshold for perfectly correlated images.""" + img = np.linspace(10, 200, 300) + thr_accurate, _ = linear_costes_threshold_calculation( + first_image=img, second_image=img, scale_max=255, fast_costes="Accurate" + ) + thr_fast, _ = linear_costes_threshold_calculation( + first_image=img, second_image=img, scale_max=255, fast_costes="Fast" + ) + thr_faster, _ = bisection_costes_threshold_calculation( + first_image=img, second_image=img, scale_max=255 + ) + max_expected = 15 # near zero in 0-255 space + assert thr_accurate < max_expected, ( + f"Expected threshold near 0 for fully correlated images, got {thr_accurate:.3f}" + ) + assert thr_fast < max_expected, ( + f"Expected threshold near 0 for fully correlated images, got {thr_fast:.3f}" + ) + assert thr_faster < max_expected, ( + f"Expected threshold near 0 for fully correlated images, got {thr_faster:.3f}" + ) + + +def test_costes_threshold_high_for_anticorrelated_images_linear() -> None: + """Linear modes must return a threshold near max for anti-correlated images. + + Note: the bisection algorithm's degenerate behaviour for purely anti-correlated + inputs (returns 0 rather than scale_max) is documented separately in + test_bisection_degenerate_anticorrelation_returns_zero. + """ + img1 = np.linspace(0, 255, 300) + img2 = np.linspace(255, 0, 300) + thr_accurate, _ = linear_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255, fast_costes="Accurate" + ) + thr_fast, _ = linear_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255, fast_costes="Fast" + ) + min_expected = 200 + assert thr_accurate > min_expected, ( + "Expected threshold near max for anti-correlated images, " + f"got {thr_accurate:.3f}" + ) + assert thr_fast > min_expected, ( + f"Expected threshold near max for anti-correlated images, got {thr_fast:.3f}" + ) + + +def test_bisection_degenerate_anticorrelation_returns_zero() -> None: + """Document bisection's edge-case behaviour for purely anti-correlated images. + + When Pearson R is negative for every candidate threshold, valid is never updated + from its initial value of 1, so the return is valid - 1 = 0. This matches + CellProfiler's library behaviour and is a known limitation for this degenerate case. + """ + img1 = np.linspace(0, 255, 300) + img2 = np.linspace(255, 0, 300) + thr, _ = bisection_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255 + ) + assert thr == 0.0, ( + f"Expected bisection to return 0 for fully anti-correlated images " + f"(degenerate case), got {thr}" + ) + + +def test_faster_mode_calls_bisection_not_linear() -> None: + """fast_costes='Faster' must route to bisection, not linear.""" + rng = np.random.default_rng(42) + img = rng.uniform(0, 255, (8, 8, 8)).astype(float) + with ( + unittest.mock.patch( + "zedprofiler.featurization.colocalization.linear_costes_threshold_calculation", + wraps=linear_costes_threshold_calculation, + ) as mock_linear, + unittest.mock.patch( + "zedprofiler.featurization.colocalization.bisection_costes_threshold_calculation", + wraps=bisection_costes_threshold_calculation, + ) as mock_bisection, + ): + calculate_colocalization(img, img, thr=15, fast_costes="Faster") + assert mock_bisection.called, ( + "bisection_costes_threshold_calculation was not called for fast_costes='Faster'" + ) + assert not mock_linear.called, ( + "linear_costes_threshold_calculation was called for fast_costes='Faster'" + ) + + def test_calculate_colocalization_identical_images() -> None: # identical images should give high correlation and Manders near 1 rng = np.random.default_rng(0) From 7db2c4690abd4a71b53cbd0857d28fad180dd56a Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 25 Jun 2026 05:34:28 -0600 Subject: [PATCH 05/11] ci: replace static coverage badge with dynamic Codecov badge - Switch README badge from a hardcoded shields.io percentage to a live Codecov badge that updates automatically on every push - Add coverage XML upload to the run_tests matrix job via codecov/codecov-action@v5 (no token needed for public repos) - Remove the coverage_and_badge job and the stale-badge check entirely One-time setup: activate the repo at codecov.io/gh/WayScience/ZedProfiler. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/run-tests.yml | 29 ++++++----------------------- README.md | 2 +- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index dbb0887..6149934 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -44,27 +44,10 @@ jobs: python-version: ${{ matrix.python_version }} - name: Install the latest version of uv uses: astral-sh/setup-uv@v7 - - name: Run pytest - run: uv run --frozen pytest - coverage_and_badge: - runs-on: ubuntu-24.04 - steps: - - name: Checkout - uses: actions/checkout@v6 - - name: Python setup - uses: actions/setup-python@v6 + - name: Run pytest with coverage + run: uv run --frozen pytest --cov=zedprofiler --cov-report=xml + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v5 with: - python-version: "3.13" - - name: Install the latest version of uv - uses: astral-sh/setup-uv@v7 - - name: Install just - run: sudo apt-get update && sudo apt-get install -y just - - name: Run coverage check and update badge - run: just coverage-check - - name: Verify coverage badge is up to date - run: | - git diff --exit-code README.md || { - echo "README coverage badge is out of date."; - echo "Run 'just coverage-check' and commit changes."; - exit 1; - } + files: coverage.xml + flags: ${{ matrix.os }}-py${{ matrix.python_version }} diff --git a/README.md b/README.md index 47a5ff8..eecdb50 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# ZEDprofiler [![Documentation](https://img.shields.io/badge/documentation-available-brightgreen)](https://zedprofiler.readthedocs.io/en/latest/) ![License](https://img.shields.io/badge/license-BSD%203--Clause-blue)[![Coverage](https://img.shields.io/badge/coverage-92%25-brightgreen)](#quality-gates) +# ZEDprofiler [![Documentation](https://img.shields.io/badge/documentation-available-brightgreen)](https://zedprofiler.readthedocs.io/en/latest/) ![License](https://img.shields.io/badge/license-BSD%203--Clause-blue)[![Coverage](https://codecov.io/gh/WayScience/ZedProfiler/branch/main/graph/badge.svg)](https://codecov.io/gh/WayScience/ZedProfiler) [![ZEDprofiler](https://github.com/WayScience/ZEDprofiler/raw/main/logo/with-text-for-dark-bg.png)](https://github.com/WayScience/ZEDprofiler) From 87a178f0711b79e72bb70f8e094c5e125053c64f Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 25 Jun 2026 05:34:48 -0600 Subject: [PATCH 06/11] ci: add CODECOV_TOKEN to coverage upload step Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/run-tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 6149934..5302e89 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -46,8 +46,9 @@ jobs: uses: astral-sh/setup-uv@v7 - name: Run pytest with coverage run: uv run --frozen pytest --cov=zedprofiler --cov-report=xml - - name: Upload coverage to Codecov + - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v5 with: + token: ${{ secrets.CODECOV_TOKEN }} files: coverage.xml flags: ${{ matrix.os }}-py${{ matrix.python_version }} From 1257b40248237462137232c78a42a1eb2c593647 Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 25 Jun 2026 05:37:27 -0600 Subject: [PATCH 07/11] ci: move coverage upload to dedicated single job Running coverage on the full matrix (8 combinations) was redundant. Coverage is now measured once on ubuntu-24.04 + Python 3.13 in a separate job, and the matrix run_tests job is back to plain pytest. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/run-tests.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 5302e89..84b66b8 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -44,6 +44,19 @@ jobs: python-version: ${{ matrix.python_version }} - name: Install the latest version of uv uses: astral-sh/setup-uv@v7 + - name: Run pytest + run: uv run --frozen pytest + coverage: + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v6 + - name: Python setup + uses: actions/setup-python@v6 + with: + python-version: "3.13" + - name: Install the latest version of uv + uses: astral-sh/setup-uv@v7 - name: Run pytest with coverage run: uv run --frozen pytest --cov=zedprofiler --cov-report=xml - name: Upload coverage reports to Codecov @@ -51,4 +64,3 @@ jobs: with: token: ${{ secrets.CODECOV_TOKEN }} files: coverage.xml - flags: ${{ matrix.os }}-py${{ matrix.python_version }} From ca1378bcec6b8bd89c74e97a3001936d7db14685 Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 25 Jun 2026 05:39:29 -0600 Subject: [PATCH 08/11] chore: remove static coverage badge script and its tests Both are obsolete now that the README uses a dynamic Codecov badge. Co-Authored-By: Claude Sonnet 4.6 --- scripts/update_coverage_badge.py | 109 ---------------------------- tests/test_update_coverage_badge.py | 78 -------------------- 2 files changed, 187 deletions(-) delete mode 100644 scripts/update_coverage_badge.py delete mode 100644 tests/test_update_coverage_badge.py diff --git a/scripts/update_coverage_badge.py b/scripts/update_coverage_badge.py deleted file mode 100644 index 4057310..0000000 --- a/scripts/update_coverage_badge.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Update the README coverage badge from a coverage.py XML report.""" - -from __future__ import annotations - -import argparse -import re -import xml.etree.ElementTree as ET -from pathlib import Path - -BADGE_PATTERN = re.compile( - r"\[!\[Coverage\]\(https://img\.shields\.io/badge/coverage-[^)]+\)\]\(#quality-gates\)" -) -BRIGHTGREEN_THRESHOLD = 90 -GREEN_THRESHOLD = 80 -YELLOWGREEN_THRESHOLD = 70 -YELLOW_THRESHOLD = 60 -ORANGE_THRESHOLD = 50 - - -def choose_color(percent: int) -> str: - """Pick a simple badge color based on rounded percentage coverage.""" - if percent >= BRIGHTGREEN_THRESHOLD: - return "brightgreen" - if percent >= GREEN_THRESHOLD: - return "green" - if percent >= YELLOWGREEN_THRESHOLD: - return "yellowgreen" - if percent >= YELLOW_THRESHOLD: - return "yellow" - if percent >= ORANGE_THRESHOLD: - return "orange" - return "red" - - -def read_percent_from_coverage_xml(coverage_file: Path) -> int: - """Parse overall line-rate from coverage XML and return rounded percent.""" - root = ET.parse(coverage_file).getroot() - line_rate = root.attrib.get("line-rate") - if line_rate is None: - msg = f"Missing 'line-rate' attribute in {coverage_file}" - raise ValueError(msg) - - return round(float(line_rate) * 100) - - -def update_readme_badge(readme_file: Path, percent: int) -> bool: - """Update badge in README and return True when file content changed.""" - color = choose_color(percent) - badge = ( - f"[![Coverage](https://img.shields.io/badge/coverage-{percent}%25-{color})]" - "(#quality-gates)" - ) - - original = readme_file.read_text(encoding="utf-8") - - if BADGE_PATTERN.search(original): - updated = BADGE_PATTERN.sub(badge, original, count=1) - else: - lines = original.splitlines() - if not lines: - msg = f"README file is empty: {readme_file}" - raise ValueError(msg) - lines.insert(1, "") - lines.insert(2, badge) - updated = "\n".join(lines) - if original.endswith("\n"): - updated += "\n" - - if updated == original: - return False - - readme_file.write_text(updated, encoding="utf-8") - return True - - -def parse_args() -> argparse.Namespace: - """Parse command line arguments.""" - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--coverage-file", - default="coverage.xml", - type=Path, - help="Path to coverage.py XML report (default: coverage.xml)", - ) - parser.add_argument( - "--readme", - default="README.md", - type=Path, - help="Path to README file (default: README.md)", - ) - return parser.parse_args() - - -def main() -> int: - """Update README badge and print a small status message.""" - args = parse_args() - percent = read_percent_from_coverage_xml(args.coverage_file) - changed = update_readme_badge(args.readme, percent) - - if changed: - print(f"Updated coverage badge to {percent}% in {args.readme}") - else: - print(f"Coverage badge already up to date at {percent}%") - - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/tests/test_update_coverage_badge.py b/tests/test_update_coverage_badge.py deleted file mode 100644 index 8cb3c8b..0000000 --- a/tests/test_update_coverage_badge.py +++ /dev/null @@ -1,78 +0,0 @@ -import importlib.util -from pathlib import Path - -import pytest - -# Load the module relative to this test file -_spec = importlib.util.spec_from_file_location( - "update_coverage_badge", - Path(__file__).parent.parent / "scripts" / "update_coverage_badge.py", -) -mod = importlib.util.module_from_spec(_spec) -_spec.loader.exec_module(mod) - - -def test_replaces_existing_badge(tmp_path: Path) -> None: - readme = tmp_path / "README.md" - original = ( - "# Project\n\n" - "[![Coverage](https://img.shields.io/badge/coverage-42%25-red)](#quality-gates)\n\n" - "Some text\n" - ) - readme.write_text(original, encoding="utf-8") - - changed = mod.update_readme_badge(readme, 85) - assert changed is True - - updated = readme.read_text(encoding="utf-8") - assert "coverage-85%25-green" in updated - assert updated.count("[![Coverage]") == 1 - assert "coverage-42%25-red" not in updated - - -def test_inserts_badge_when_missing(tmp_path: Path) -> None: - readme = tmp_path / "README.md" - original = "# Project\nDescription line\n" - readme.write_text(original, encoding="utf-8") - - changed = mod.update_readme_badge(readme, 60) - assert changed is True - - updated = readme.read_text(encoding="utf-8") - lines = updated.splitlines() - # After insertion: title at 0, blank line at 1, badge at 2 - assert lines[0] == "# Project" - assert lines[1] == "" - assert lines[2].startswith("[![Coverage]") - assert "coverage-60%25-yellow" in lines[2] - - -def test_preserves_trailing_newline_on_insert(tmp_path: Path) -> None: - readme = tmp_path / "README.md" - original = "# Project\nDescription line\n" # ends with newline - readme.write_text(original, encoding="utf-8") - - changed = mod.update_readme_badge(readme, 70) - assert changed is True - - updated = readme.read_text(encoding="utf-8") - assert updated.endswith("\n") - - -def test_no_change_returns_false_when_badge_already_exact(tmp_path: Path) -> None: - readme = tmp_path / "README.md" - # percent 75 -> color "yellowgreen" (per thresholds) - badge = "[![Coverage](https://img.shields.io/badge/coverage-75%25-yellowgreen)](#quality-gates)" - original = f"# Project\n\n{badge}\n\nMore\n" - readme.write_text(original, encoding="utf-8") - - changed = mod.update_readme_badge(readme, 75) - assert changed is False - assert readme.read_text(encoding="utf-8") == original - - -def test_empty_readme_raises_value_error(tmp_path: Path) -> None: - readme = tmp_path / "README.md" - readme.write_text("", encoding="utf-8") - with pytest.raises(ValueError): - mod.update_readme_badge(readme, 50) From 840eaefd3a7d0ae0e0ca95dbc56b2e43674dd316 Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 2 Jul 2026 12:19:36 -0600 Subject: [PATCH 09/11] fix: scope ValueError handler to per-label slice in compute_texture When mahotas.features.haralick raises ValueError (e.g. single-pixel objects with no valid neighbour pairs at the given distance), the except branch was replacing the entire 3D features array with a 1D NaN array, destroying all previously-computed labels and corrupting the output loop. Fix: write NaN only to the failing label's slice (features[:, :, label - 1] = numpy.nan) so other labels are unaffected. Adds regression test: a single-pixel object (reliable ValueError trigger) processed after a normal object must leave the normal object's Haralick values intact. Co-Authored-By: Claude Sonnet 4.6 --- src/zedprofiler/featurization/texture.py | 2 +- tests/featurization/test_texture.py | 52 ++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/zedprofiler/featurization/texture.py b/src/zedprofiler/featurization/texture.py index 780d333..357e82d 100644 --- a/src/zedprofiler/featurization/texture.py +++ b/src/zedprofiler/featurization/texture.py @@ -179,7 +179,7 @@ def compute_texture( # noqa: C901 compute_14th_feature=False, ) except ValueError: - features = numpy.full(len(feature_names), numpy.nan, dtype=float) + features[:, :, label - 1] = numpy.nan # iterate through the direction, feature, and object dimensions # of the features array to populate the output dictionary for direction, direction_features in enumerate(features): diff --git a/tests/featurization/test_texture.py b/tests/featurization/test_texture.py index 7de2f3c..48ddd5c 100644 --- a/tests/featurization/test_texture.py +++ b/tests/featurization/test_texture.py @@ -62,6 +62,58 @@ def test_compute_texture_basic( assert "Metadata_Object_ObjectID" in df.columns +def test_value_error_in_one_object_does_not_corrupt_others() -> None: + """When mahotas raises ValueError for one object, other objects must be unaffected. + + Regression test for a bug where the except-ValueError branch executed: + features = numpy.full(len(feature_names), numpy.nan, dtype=float) + This replaced the entire 3D features array with a 1D array, destroying all + previously-computed labels and corrupting the iteration that follows. + + A single-pixel object has no valid pixel pairs at any distance when + ignore_zeros=True, so it reliably triggers ValueError. Object 1 (a normal + 4x4x4 cube) is processed first and must retain its Haralick values even + after object 2 (single pixel) raises ValueError. + """ + shape = (20, 20, 20) + image = np.zeros(shape, dtype=np.uint8) + label = np.zeros(shape, dtype=np.int32) + + image[1:5, 1:5, 1:5] = 200 + label[1:5, 1:5, 1:5] = LABEL_OBJ_1 + + # single pixel — no neighbour pairs → ValueError from mahotas + image[15, 15, 15] = 100 + label[15, 15, 15] = LABEL_OBJ_2 + + imgset = ImageSetLoaderModel() + loader = ObjectLoaderModel( + image=image, + label_image=label, + object_ids=[LABEL_OBJ_1, LABEL_OBJ_2], + image_set_loader=imgset, + ) + + df = compute_texture(loader, distance=1, grayscale=256) + + obj1_rows = df[df["Metadata_Object_ObjectID"] == LABEL_OBJ_1] + assert not obj1_rows.empty, "Object 1 missing from output after object 2 ValueError" + + asm_cols = [c for c in df.columns if "AngularSecondMoment" in c and "-00-" in c] + assert asm_cols, "No AngularSecondMoment direction-00 column found" + + obj1_asm = obj1_rows[asm_cols[0]].iloc[0] + assert not np.isnan(obj1_asm), ( + "Object 1 AngularSecondMoment is NaN after object 2 raised ValueError. " + "The except branch corrupted the shared features array." + ) + + obj2_rows = df[df["Metadata_Object_ObjectID"] == LABEL_OBJ_2] + assert not obj2_rows.empty, "Object 2 missing from output" + obj2_asm = obj2_rows[asm_cols[0]].iloc[0] + assert np.isnan(obj2_asm), "Object 2 (single pixel) should have NaN texture values" + + def test_texture_values_correct_per_object() -> None: """Each object must receive its own Haralick values, not another object's. From 9f7612af9fdab086aa3f81da3d4d712a248502d8 Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 2 Jul 2026 15:59:44 -0600 Subject: [PATCH 10/11] fill feature vector with nan instead of random --- src/zedprofiler/featurization/texture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zedprofiler/featurization/texture.py b/src/zedprofiler/featurization/texture.py index 357e82d..56f7dfb 100644 --- a/src/zedprofiler/featurization/texture.py +++ b/src/zedprofiler/featurization/texture.py @@ -151,7 +151,7 @@ def compute_texture( # noqa: C901 ) # loop through each label and get the bounding box # to compute features for the object - features = numpy.empty((n_directions, 13, max(labels))) + features = numpy.full((n_directions, 13, max(labels)), numpy.nan) for _, label in enumerate(labels): if int(label) == 0: continue From 154f7ff2087433751692b7e9a5a8543c72c55ac3 Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Sat, 4 Jul 2026 05:21:06 -0600 Subject: [PATCH 11/11] Fix texture output indexing bug for non-sequential object IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same fix as NF1_3D: object IDs may be global (e.g. 257, 514) rather than per-image sequential. The previous code allocated a features array sized to max(label_id) and wrote results to features[:, :, label-1], but then read back via zip(labels, feature) which reads from positions 0, 1, 2... — the wrong slots. Computed texture values were silently discarded and cells received uninitialized memory values instead. Fix: build a label_to_idx mapping so the features array is sized to len(labels) and both writes and reads use the same positional index. This is correct for any label ID scheme (sequential, global, or sparse). Co-Authored-By: Claude Sonnet 4.6 --- src/zedprofiler/featurization/texture.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/zedprofiler/featurization/texture.py b/src/zedprofiler/featurization/texture.py index 56f7dfb..511389f 100644 --- a/src/zedprofiler/featurization/texture.py +++ b/src/zedprofiler/featurization/texture.py @@ -151,10 +151,12 @@ def compute_texture( # noqa: C901 ) # loop through each label and get the bounding box # to compute features for the object - features = numpy.full((n_directions, 13, max(labels)), numpy.nan) - for _, label in enumerate(labels): + label_to_idx = {int(lbl): i for i, lbl in enumerate(labels)} + features = numpy.full((n_directions, 13, len(labels)), numpy.nan) + for label in labels: if int(label) == 0: continue + idx = label_to_idx[int(label)] bbox = label_to_bbox.get(int(label)) if bbox is None: continue @@ -172,25 +174,27 @@ def compute_texture( # noqa: C901 try: # calculates 13 Haralick features for each direction (13) # and each object, and stores them in a 3D array - features[:, :, label - 1] = mahotas.features.haralick( + features[:, :, idx] = mahotas.features.haralick( ignore_zeros=True, f=image_object, distance=distance, compute_14th_feature=False, ) except ValueError: - features[:, :, label - 1] = numpy.nan + features[:, :, idx] = numpy.nan # iterate through the direction, feature, and object dimensions # of the features array to populate the output dictionary for direction, direction_features in enumerate(features): direction_str = f"{direction:02d}" for feature_name, feature in zip(feature_names, direction_features): - for object_id, feature_value in zip(labels, feature): - output_texture_dict["Metadata_Object_ObjectID"].append(object_id) + for label in labels: + output_texture_dict["Metadata_Object_ObjectID"].append(label) output_texture_dict["texture_name"].append( f"{feature_name}-{distance}-{direction_str}-{grayscale}" ) - output_texture_dict["texture_value"].append(feature_value) + output_texture_dict["texture_value"].append( + feature[label_to_idx[int(label)]] + ) final_df = pandas.DataFrame(output_texture_dict) final_df = final_df.pivot(