[RNE Rewrite] feat: add semantic segmentation task#1275
Conversation
|
Two models are missing from |
Yeah, v0.10.0 sounds ok. |
| if (dst->dtype_ != rnexecutorch::core::types::DType::uint8) { | ||
| throw jsi::JSError(rt, "applyColormap: dst must be uint8"); | ||
| } | ||
| if (dst->numel_ != src->numel_ * 4) { |
There was a problem hiding this comment.
I guess 4 might be replaced here and in other places in this file as constexpr size_t numChannels = 4; or so.
| dstData[i * 4 + 0] = lut[idx][0]; | ||
| dstData[i * 4 + 1] = lut[idx][1]; | ||
| dstData[i * 4 + 2] = lut[idx][2]; | ||
| dstData[i * 4 + 3] = lut[idx][3]; |
There was a problem hiding this comment.
maybe this one when you introduce numChannels? Loop will be automatically unrolled on constexpr numChannels. Just add correct formatting.
| dstData[i * 4 + 0] = lut[idx][0]; | |
| dstData[i * 4 + 1] = lut[idx][1]; | |
| dstData[i * 4 + 2] = lut[idx][2]; | |
| dstData[i * 4 + 3] = lut[idx][3]; | |
| for (size_t c = 0; c < numChannels; ++c) { | |
| dstData[i * numChannels + c] = lut[idx][c]; | |
| } |
| int32_t *dstData = reinterpret_cast<int32_t *>(dst->data_.get()); | ||
| std::vector<float> maxVals(inner); | ||
|
|
||
| for (size_t o = 0; o < outer; ++o) { | ||
| const float *srcSlab = srcData + o * axisDim * inner; | ||
| int32_t *dstRow = dstData + o * inner; | ||
|
|
||
| for (size_t i = 0; i < inner; ++i) { | ||
| maxVals[i] = -std::numeric_limits<float>::infinity(); | ||
| dstRow[i] = 0; | ||
| } | ||
|
|
||
| for (size_t d = 0; d < axisDim; ++d) { | ||
| const float *srcRow = srcSlab + d * inner; | ||
| for (size_t i = 0; i < inner; ++i) { | ||
| const float val = srcRow[i]; | ||
| if (val > maxVals[i]) { | ||
| maxVals[i] = val; | ||
| dstRow[i] = static_cast<int32_t>(d); | ||
| float maxVal = -std::numeric_limits<float>::infinity(); | ||
| int32_t maxIdx = 0; | ||
| for (size_t d = 0; d < axisDim; ++d) { | ||
| const float val = srcData[o * axisDim * inner + d * inner + i]; | ||
| if (val > maxVal) { | ||
| maxVal = val; | ||
| maxIdx = static_cast<int32_t>(d); | ||
| } | ||
| } | ||
| dstData[o * inner + i] = maxIdx; |
There was a problem hiding this comment.
Why do we eventually change this one. Didn't it give performance gain?
There was a problem hiding this comment.
The change from #1264 caused a performance regression for default axis=-1 case as in the PR description.
There was a problem hiding this comment.
Maybe we can add some comment in here to not "optimize" it wrongly in the future then?
| import { validateModelSchema, SymbolicTensor } from '../../../core/modelSchema'; | ||
| import { wrapAsync } from '../../../core/runtime'; | ||
|
|
||
| import { type ImageBuffer } from '../image'; |
There was a problem hiding this comment.
| import { type ImageBuffer } from '../image'; | |
| import type { ImageBuffer } from '../image'; |
Description
axis=-1case as the internal loop was over contiguous elements. The linked PR changed it so that it was more efficient foraxis=0case, however since the default isaxis=-1this caused a performance regression in the semantic segmentation task.Introduces a breaking change?
Type of change
Tested on
Testing instructions
Screenshots
Related issues
Closes #1242
Checklist
Additional notes