diff --git a/apps/nlp/app.json b/apps/nlp/app.json
new file mode 100644
index 0000000000..929d222914
--- /dev/null
+++ b/apps/nlp/app.json
@@ -0,0 +1,45 @@
+{
+ "expo": {
+ "name": "nlp",
+ "slug": "nlp",
+ "version": "1.0.0",
+ "orientation": "portrait",
+ "icon": "./assets/icons/icon.png",
+ "userInterfaceStyle": "light",
+ "newArchEnabled": true,
+ "scheme": "rne-nlp",
+ "splash": {
+ "image": "./assets/icons/splash.png",
+ "resizeMode": "contain",
+ "backgroundColor": "#ffffff"
+ },
+ "ios": {
+ "supportsTablet": true,
+ "bundleIdentifier": "com.anonymous.nlp"
+ },
+ "android": {
+ "adaptiveIcon": {
+ "foregroundImage": "./assets/icons/adaptive-icon.png",
+ "backgroundColor": "#ffffff"
+ },
+ "package": "com.anonymous.nlp"
+ },
+ "web": {
+ "favicon": "./assets/icons/favicon.png"
+ },
+ "plugins": [
+ "expo-router",
+ [
+ "expo-build-properties",
+ {
+ "android": {
+ "minSdkVersion": 26
+ },
+ "ios": {
+ "deploymentTarget": "17.0"
+ }
+ }
+ ]
+ ]
+ }
+}
diff --git a/apps/nlp/app/_layout.tsx b/apps/nlp/app/_layout.tsx
new file mode 100644
index 0000000000..bdcfc39660
--- /dev/null
+++ b/apps/nlp/app/_layout.tsx
@@ -0,0 +1,32 @@
+import { Drawer } from 'expo-router/drawer';
+import { ColorPalette } from '../theme';
+import React from 'react';
+
+export default function Layout() {
+ return (
+
+ null,
+ title: 'Main Menu',
+ drawerItemStyle: { display: 'none' },
+ }}
+ />
+
+
+ );
+}
diff --git a/apps/nlp/app/index.tsx b/apps/nlp/app/index.tsx
new file mode 100644
index 0000000000..98ff59ab97
--- /dev/null
+++ b/apps/nlp/app/index.tsx
@@ -0,0 +1,51 @@
+import { useRouter } from 'expo-router';
+import { View, Text, StyleSheet, TouchableOpacity } from 'react-native';
+import { ColorPalette } from '../theme';
+import ExecutorchLogo from '../assets/icons/executorch.svg';
+
+export default function Home() {
+ const router = useRouter();
+
+ return (
+
+
+ Select a demo
+
+ router.navigate('tokenizer/')}>
+ Tokenizer
+
+
+
+ );
+}
+
+const styles = StyleSheet.create({
+ container: {
+ flex: 1,
+ justifyContent: 'center',
+ alignItems: 'center',
+ backgroundColor: '#fff',
+ },
+ headerText: {
+ fontSize: 18,
+ color: ColorPalette.strongPrimary,
+ margin: 20,
+ },
+ buttonContainer: {
+ width: '80%',
+ justifyContent: 'space-evenly',
+ marginBottom: 20,
+ },
+ button: {
+ backgroundColor: ColorPalette.strongPrimary,
+ borderRadius: 8,
+ padding: 14,
+ alignItems: 'center',
+ marginBottom: 12,
+ },
+ buttonText: {
+ color: 'white',
+ fontSize: 16,
+ fontWeight: '600',
+ },
+});
diff --git a/apps/nlp/app/tokenizer/index.tsx b/apps/nlp/app/tokenizer/index.tsx
new file mode 100644
index 0000000000..dbaf0ed3d0
--- /dev/null
+++ b/apps/nlp/app/tokenizer/index.tsx
@@ -0,0 +1,272 @@
+import React, { useEffect, useRef, useState } from 'react';
+import { View, Text, TextInput, ScrollView, StyleSheet } from 'react-native';
+import { useTokenizer, models } from 'react-native-executorch';
+import ScreenWrapper from '../../components/ScreenWrapper';
+import { ModelStatus } from '../../components/ModelStatus';
+import { Button } from '../../components/Button';
+import { theme } from '../../theme';
+
+type Check = { label: string; detail: string; pass: boolean };
+
+function TokenizerContent() {
+ const { isReady, downloadProgress, error, encode, decode, getVocabSize, idToToken, tokenToId } =
+ useTokenizer(models.tokenizer.ALL_MINILM_L6_V2);
+
+ const [text, setText] = useState('Hello world');
+ const [running, setRunning] = useState(false);
+ const [runError, setRunError] = useState(null);
+ const [ids, setIds] = useState(null);
+ const [roundTrip, setRoundTrip] = useState(null);
+ const [vocabSize, setVocabSize] = useState(null);
+ const [checks, setChecks] = useState([]);
+
+ const ready = isReady && encode && decode && getVocabSize && idToToken && tokenToId;
+
+ const run = async () => {
+ if (!ready) return;
+ setRunning(true);
+ setRunError(null);
+ setIds(null);
+ setRoundTrip(null);
+ setVocabSize(null);
+ setChecks([]);
+ try {
+ const tokenIds = await encode(text);
+ const decoded = await decode(tokenIds, true);
+ const vocab = getVocabSize();
+
+ // Self-consistent inverse check on a token from the actual output
+ // (HFTokenizer adds special tokens per the tokenizer.json post_processor).
+ const sampleId = tokenIds[Math.min(1, tokenIds.length - 1)]!;
+ const sampleToken = idToToken(sampleId);
+ const sampleIdBack = tokenToId(sampleToken);
+
+ const nextChecks: Check[] = [
+ {
+ label: 'Round-trip decode(encode(text))',
+ detail: `"${decoded}" vs "${text.toLowerCase()}"`,
+ // all-MiniLM-L6-v2 is an uncased BERT WordPiece tokenizer
+ pass: decoded.trim() === text.trim().toLowerCase(),
+ },
+ {
+ label: 'Vocabulary size',
+ detail: `${vocab} (expected 30522 for bert-base-uncased)`,
+ pass: vocab === 30522,
+ },
+ {
+ label: 'Inverse tokenToId(idToToken(id))',
+ detail: `${sampleId} → "${sampleToken}" → ${sampleIdBack}`,
+ pass: sampleIdBack === sampleId,
+ },
+ ];
+
+ setIds(tokenIds);
+ setRoundTrip(decoded);
+ setVocabSize(vocab);
+ setChecks(nextChecks);
+
+ // Structured log so the result is verifiable from device/Metro logs.
+ console.log(
+ '[TokenizerTest]',
+ JSON.stringify({
+ allPass: nextChecks.every((c) => c.pass),
+ input: text,
+ ids: tokenIds,
+ decoded,
+ vocab,
+ checks: nextChecks.map((c) => ({ label: c.label, pass: c.pass, detail: c.detail })),
+ })
+ );
+ } catch (e: any) {
+ console.log('[TokenizerTest] ERROR', e?.message ?? String(e));
+ setRunError(e?.message ?? String(e));
+ } finally {
+ setRunning(false);
+ }
+ };
+
+ // Auto-run once as soon as the tokenizer is ready, so the demo doubles as a
+ // self-checking smoke test (results logged under "[TokenizerTest]").
+ const autoRan = useRef(false);
+ useEffect(() => {
+ if (ready && !autoRan.current) {
+ autoRan.current = true;
+ run();
+ }
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [ready]);
+
+ return (
+
+
+ Tokenizer
+
+ Loads the all-MiniLM-L6-v2 tokenizer and proves encode / decode / getVocabSize / idToToken
+ / tokenToId work end-to-end against the native HFTokenizer.
+
+
+
+
+
+
+
+
+
+
+
+ {runError && (
+
+ {runError}
+
+ )}
+
+ {ids && (
+
+ Results
+
+ Token IDs ({ids.length})
+ [{ids.join(', ')}]
+
+ Decoded (skipSpecialTokens)
+ {roundTrip}
+
+ Vocab size
+ {vocabSize}
+
+ Assertions
+ {checks.map((c, i) => (
+
+
+ {c.pass ? 'PASS' : 'FAIL'}
+
+
+ {c.label}
+ {c.detail}
+
+
+ ))}
+
+ )}
+
+ );
+}
+
+export default function TokenizerScreen() {
+ return (
+
+
+
+ );
+}
+
+const styles = StyleSheet.create({
+ container: { flex: 1, backgroundColor: theme.colors.background },
+ content: { padding: theme.spacing.large, paddingBottom: 40 },
+ card: {
+ backgroundColor: theme.colors.cardBackground,
+ borderRadius: theme.radius.large,
+ padding: 20,
+ marginBottom: 20,
+ borderWidth: 1,
+ borderColor: theme.colors.lightBorder,
+ },
+ cardTitle: {
+ fontSize: theme.typography.title.fontSize,
+ fontWeight: theme.typography.title.fontWeight,
+ color: theme.colors.strongPrimary,
+ marginBottom: 8,
+ },
+ cardDescription: {
+ fontSize: 14,
+ color: theme.colors.textMuted,
+ lineHeight: 20,
+ marginBottom: 16,
+ },
+ input: {
+ backgroundColor: '#f1f3f5',
+ borderRadius: theme.radius.small,
+ padding: 12,
+ fontSize: 15,
+ color: '#212529',
+ marginBottom: 16,
+ borderWidth: 1,
+ borderColor: theme.colors.lightBorder,
+ },
+ buttonRow: { flexDirection: 'row', gap: theme.spacing.small },
+ errorContainer: {
+ backgroundColor: theme.colors.errorBackground,
+ padding: 12,
+ borderRadius: theme.radius.small,
+ marginBottom: 16,
+ },
+ errorText: { color: theme.colors.errorText, fontSize: 14, textAlign: 'center' },
+ resultsCard: {
+ backgroundColor: theme.colors.cardBackground,
+ borderRadius: theme.radius.large,
+ padding: 20,
+ borderWidth: 1,
+ borderColor: theme.colors.lightBorder,
+ },
+ resultsHeader: {
+ fontSize: 18,
+ fontWeight: '700',
+ color: '#212529',
+ marginBottom: 16,
+ borderBottomWidth: 1,
+ borderBottomColor: '#f1f3f5',
+ paddingBottom: 10,
+ },
+ fieldLabel: {
+ fontSize: 12,
+ fontWeight: '600',
+ color: theme.colors.textPlaceholder,
+ textTransform: 'uppercase',
+ letterSpacing: 0.5,
+ marginTop: 12,
+ marginBottom: 4,
+ },
+ mono: {
+ fontSize: 13,
+ color: '#495057',
+ backgroundColor: '#f8f9fa',
+ padding: 8,
+ borderRadius: 6,
+ borderWidth: 1,
+ borderColor: theme.colors.lightBorder,
+ },
+ checksHeader: {
+ fontSize: 16,
+ fontWeight: '700',
+ color: '#212529',
+ marginTop: 20,
+ marginBottom: 10,
+ },
+ checkRow: { flexDirection: 'row', alignItems: 'flex-start', gap: 10, marginBottom: 10 },
+ checkBadge: {
+ fontSize: 11,
+ fontWeight: '700',
+ color: '#fff',
+ paddingHorizontal: 8,
+ paddingVertical: 4,
+ borderRadius: 4,
+ overflow: 'hidden',
+ },
+ pass: { backgroundColor: '#2b8a3e' },
+ fail: { backgroundColor: theme.colors.errorText },
+ checkBody: { flex: 1 },
+ checkLabel: { fontSize: 14, fontWeight: '600', color: '#212529' },
+ checkDetail: { fontSize: 12, color: theme.colors.textPlaceholder, marginTop: 2 },
+});
diff --git a/apps/nlp/assets/icons/adaptive-icon.png b/apps/nlp/assets/icons/adaptive-icon.png
new file mode 100644
index 0000000000..03d6f6b6c6
Binary files /dev/null and b/apps/nlp/assets/icons/adaptive-icon.png differ
diff --git a/apps/nlp/assets/icons/executorch.svg b/apps/nlp/assets/icons/executorch.svg
new file mode 100644
index 0000000000..e548ea4201
--- /dev/null
+++ b/apps/nlp/assets/icons/executorch.svg
@@ -0,0 +1,9 @@
+
diff --git a/apps/nlp/assets/icons/favicon.png b/apps/nlp/assets/icons/favicon.png
new file mode 100644
index 0000000000..e75f697b18
Binary files /dev/null and b/apps/nlp/assets/icons/favicon.png differ
diff --git a/apps/nlp/assets/icons/icon.png b/apps/nlp/assets/icons/icon.png
new file mode 100644
index 0000000000..a0b1526fc7
Binary files /dev/null and b/apps/nlp/assets/icons/icon.png differ
diff --git a/apps/nlp/assets/icons/splash.png b/apps/nlp/assets/icons/splash.png
new file mode 100644
index 0000000000..0e89705a94
Binary files /dev/null and b/apps/nlp/assets/icons/splash.png differ
diff --git a/apps/nlp/babel.config.js b/apps/nlp/babel.config.js
new file mode 100644
index 0000000000..6b2006979c
--- /dev/null
+++ b/apps/nlp/babel.config.js
@@ -0,0 +1,7 @@
+module.exports = function (api) {
+ api.cache(true);
+ return {
+ presets: ['babel-preset-expo'],
+ plugins: ['react-native-worklets/plugin'],
+ };
+};
diff --git a/apps/nlp/components/Button.tsx b/apps/nlp/components/Button.tsx
new file mode 100644
index 0000000000..6c4c5d8081
--- /dev/null
+++ b/apps/nlp/components/Button.tsx
@@ -0,0 +1,102 @@
+import React from 'react';
+import {
+ TouchableOpacity,
+ Text,
+ ActivityIndicator,
+ StyleSheet,
+ type StyleProp,
+ type ViewStyle,
+} from 'react-native';
+import { theme } from '../theme';
+
+export interface ButtonProps {
+ title: string;
+ onPress: () => void;
+ variant?: 'primary' | 'secondary' | 'accent';
+ disabled?: boolean;
+ loading?: boolean;
+ style?: StyleProp;
+}
+
+export function Button({
+ title,
+ onPress,
+ variant = 'primary',
+ disabled = false,
+ loading = false,
+ style,
+}: ButtonProps) {
+ const buttonStyles = [
+ styles.button,
+ variant === 'primary' && styles.primary,
+ variant === 'secondary' && styles.secondary,
+ variant === 'accent' && styles.accent,
+ disabled && styles.disabled,
+ style,
+ ];
+
+ const textStyles = [
+ styles.text,
+ variant === 'primary' && styles.textPrimary,
+ variant === 'secondary' && styles.textSecondary,
+ variant === 'accent' && styles.textPrimary,
+ disabled && styles.textDisabled,
+ ];
+
+ return (
+
+ {loading ? (
+
+ ) : (
+ {title}
+ )}
+
+ );
+}
+
+const styles = StyleSheet.create({
+ button: {
+ flex: 1,
+ paddingVertical: 14,
+ borderRadius: theme.radius.medium,
+ alignItems: 'center',
+ justifyContent: 'center',
+ },
+ primary: {
+ backgroundColor: theme.colors.strongPrimary,
+ },
+ secondary: {
+ backgroundColor: theme.colors.secondary,
+ borderColor: theme.colors.strongPrimary,
+ borderWidth: 1.5,
+ },
+ accent: {
+ backgroundColor: theme.colors.accent,
+ },
+ disabled: {
+ backgroundColor: '#aaa',
+ borderColor: '#aaa',
+ opacity: 0.6,
+ },
+ text: {
+ fontSize: 15,
+ fontWeight: '600',
+ },
+ textPrimary: {
+ color: theme.colors.textPrimary,
+ },
+ textSecondary: {
+ color: theme.colors.textSecondary,
+ },
+ textDisabled: {
+ color: '#666',
+ },
+});
diff --git a/apps/nlp/components/ModelStatus.tsx b/apps/nlp/components/ModelStatus.tsx
new file mode 100644
index 0000000000..0234ac53f8
--- /dev/null
+++ b/apps/nlp/components/ModelStatus.tsx
@@ -0,0 +1,69 @@
+import React from 'react';
+import { View, Text, ActivityIndicator, StyleSheet } from 'react-native';
+import { theme } from '../theme';
+
+export interface ModelStatusProps {
+ isReady: boolean;
+ downloadProgress?: number | null;
+ error?: string | null;
+ modelTypeLabel?: string;
+}
+
+export function ModelStatus({
+ isReady,
+ downloadProgress,
+ error,
+ modelTypeLabel = 'model',
+}: ModelStatusProps) {
+ if (error) {
+ return (
+
+ {error}
+
+ );
+ }
+
+ if (!isReady) {
+ return (
+
+
+
+ Downloading {modelTypeLabel}...{' '}
+ {downloadProgress ? `${Math.round(downloadProgress)}%` : '0%'}
+
+
+ );
+ }
+
+ return null;
+}
+
+const styles = StyleSheet.create({
+ statusBox: {
+ flexDirection: 'row',
+ alignItems: 'center',
+ backgroundColor: '#ffe8d6',
+ paddingHorizontal: 16,
+ paddingVertical: 10,
+ borderRadius: theme.radius.small,
+ marginBottom: 16,
+ width: '100%',
+ },
+ statusIndicator: {
+ marginRight: 8,
+ },
+ statusText: { fontSize: 13, color: '#a0522d', fontWeight: '500' },
+ errorContainer: {
+ backgroundColor: theme.colors.errorBackground,
+ padding: 12,
+ borderRadius: theme.radius.small,
+ marginVertical: 8,
+ alignSelf: 'stretch',
+ marginBottom: 16,
+ },
+ errorText: {
+ color: theme.colors.errorText,
+ fontSize: 14,
+ textAlign: 'center',
+ },
+});
diff --git a/apps/nlp/components/ScreenWrapper.tsx b/apps/nlp/components/ScreenWrapper.tsx
new file mode 100644
index 0000000000..31f70e4442
--- /dev/null
+++ b/apps/nlp/components/ScreenWrapper.tsx
@@ -0,0 +1,8 @@
+import { useIsFocused } from 'expo-router';
+import { PropsWithChildren } from 'react';
+
+export default function ScreenWrapper({ children }: PropsWithChildren) {
+ const isFocused = useIsFocused();
+
+ return isFocused ? <>{children}> : null;
+}
diff --git a/apps/nlp/declarations.d.ts b/apps/nlp/declarations.d.ts
new file mode 100644
index 0000000000..85e178f497
--- /dev/null
+++ b/apps/nlp/declarations.d.ts
@@ -0,0 +1,5 @@
+declare module '*.svg' {
+ import { SvgProps } from 'react-native-svg';
+ const content: React.FV;
+ export default content;
+}
diff --git a/apps/nlp/index.ts b/apps/nlp/index.ts
new file mode 100644
index 0000000000..3f443dcf95
--- /dev/null
+++ b/apps/nlp/index.ts
@@ -0,0 +1,8 @@
+import { registerRootComponent } from 'expo';
+
+import App from './app';
+
+// registerRootComponent calls AppRegistry.registerComponent('main', () => App);
+// It also ensures that whether you load the app in Expo Go or in a native build,
+// the environment is set up appropriately
+registerRootComponent(App);
diff --git a/apps/nlp/metro.config.js b/apps/nlp/metro.config.js
new file mode 100644
index 0000000000..f8ab2ab96d
--- /dev/null
+++ b/apps/nlp/metro.config.js
@@ -0,0 +1,21 @@
+// Learn more https://docs.expo.io/guides/customizing-metro
+const { getDefaultConfig } = require('expo/metro-config');
+
+/** @type {import('expo/metro-config').MetroConfig} */
+const config = getDefaultConfig(__dirname);
+
+const { transformer, resolver } = config;
+
+config.transformer = {
+ ...transformer,
+ babelTransformerPath: require.resolve('react-native-svg-transformer/expo'),
+};
+config.resolver = {
+ ...resolver,
+ assetExts: resolver.assetExts.filter((ext) => ext !== 'svg'),
+ sourceExts: [...resolver.sourceExts, 'svg'],
+};
+
+config.resolver.assetExts.push('pte');
+
+module.exports = config;
diff --git a/apps/nlp/package.json b/apps/nlp/package.json
new file mode 100644
index 0000000000..973f442349
--- /dev/null
+++ b/apps/nlp/package.json
@@ -0,0 +1,42 @@
+{
+ "name": "nlp",
+ "version": "1.0.0",
+ "main": "expo-router/entry",
+ "scripts": {
+ "start": "expo start",
+ "android": "expo run:android",
+ "ios": "expo run:ios",
+ "web": "expo start --web",
+ "typecheck": "tsc",
+ "lint": "eslint . --ext .ts,.tsx --fix",
+ "postinstall": "yarn run -T patch-package --patch-dir ../../patches"
+ },
+ "dependencies": {
+ "@react-navigation/drawer": "^7.9.4",
+ "@react-navigation/native": "^7.2.2",
+ "expo": "~56.0.9",
+ "expo-build-properties": "~56.0.17",
+ "expo-constants": "~56.0.17",
+ "expo-linking": "~56.0.13",
+ "expo-router": "~56.2.9",
+ "react": "19.2.3",
+ "react-native": "0.85.3",
+ "react-native-drawer-layout": "^4.2.2",
+ "react-native-executorch": "workspace:*",
+ "react-native-gesture-handler": "~2.31.1",
+ "react-native-reanimated": "4.4.0",
+ "react-native-safe-area-context": "~5.7.0",
+ "react-native-screens": "~4.25.2",
+ "react-native-svg": "15.15.4",
+ "react-native-worklets": "0.9.1"
+ },
+ "devDependencies": {
+ "@babel/core": "^7.29.0",
+ "@react-native/metro-config": "^0.86.0",
+ "@types/react": "~19.2.0",
+ "babel-preset-expo": "~56.0.14",
+ "react-native-svg-transformer": "^1.5.3",
+ "react-refresh": "^0.14.0"
+ },
+ "private": true
+}
diff --git a/apps/nlp/theme.ts b/apps/nlp/theme.ts
new file mode 100644
index 0000000000..765ed34ae1
--- /dev/null
+++ b/apps/nlp/theme.ts
@@ -0,0 +1,76 @@
+import { StyleSheet } from 'react-native';
+
+export const ColorPalette = {
+ primary: '#001A72',
+ strongPrimary: '#020F3C',
+};
+
+export const theme = {
+ colors: {
+ primary: ColorPalette.primary,
+ strongPrimary: ColorPalette.strongPrimary,
+ secondary: '#ffffff',
+ accent: '#1a73e8',
+ background: '#f5f5f5',
+ cardBackground: '#ffffff',
+ placeholderBackground: '#eaeaea',
+ border: '#ccc',
+ lightBorder: '#e9ecef',
+ errorBackground: '#ffe3e3',
+ errorText: '#d63031',
+ textPrimary: '#ffffff',
+ textSecondary: '#000000',
+ textMuted: '#666666',
+ textPlaceholder: '#868e96',
+ },
+ radius: {
+ small: 8,
+ medium: 12,
+ large: 16,
+ },
+ spacing: {
+ small: 8,
+ medium: 12,
+ large: 16,
+ },
+ typography: {
+ title: {
+ fontSize: 22,
+ fontWeight: '700' as const,
+ },
+ body: {
+ fontSize: 14,
+ color: '#333333',
+ },
+ },
+};
+
+export const commonStyles = StyleSheet.create({
+ container: {
+ flex: 1,
+ backgroundColor: theme.colors.background,
+ },
+ contentContainer: {
+ padding: theme.spacing.large,
+ alignItems: 'center',
+ },
+ title: {
+ fontSize: theme.typography.title.fontSize,
+ fontWeight: theme.typography.title.fontWeight,
+ color: theme.colors.strongPrimary,
+ marginBottom: theme.spacing.large,
+ },
+ buttonRow: {
+ flexDirection: 'row',
+ gap: theme.spacing.small,
+ width: '100%',
+ marginBottom: theme.spacing.large,
+ },
+ description: {
+ fontSize: theme.typography.body.fontSize,
+ color: theme.colors.textMuted,
+ textAlign: 'center',
+ marginBottom: theme.spacing.large,
+ paddingHorizontal: theme.spacing.medium,
+ },
+});
diff --git a/apps/nlp/tsconfig.json b/apps/nlp/tsconfig.json
new file mode 100644
index 0000000000..6f2f29b5b3
--- /dev/null
+++ b/apps/nlp/tsconfig.json
@@ -0,0 +1,16 @@
+{
+ "extends": "../../tsconfig.json",
+ "compilerOptions": {
+ "strict": true,
+ "allowJs": true,
+ "module": "preserve",
+ "moduleDetection": "force",
+ "moduleResolution": "bundler",
+ "customConditions": ["react-native"],
+ "noEmit": true,
+ "paths": {
+ "react-native-executorch": ["../../packages/react-native-executorch/src"],
+ "react-native-executorch-expo-resource-fetcher": ["../../packages/expo-resource-fetcher/src"]
+ }
+ }
+}
diff --git a/packages/react-native-executorch/android/CMakeLists.txt b/packages/react-native-executorch/android/CMakeLists.txt
index 14a06d7546..75006a676a 100644
--- a/packages/react-native-executorch/android/CMakeLists.txt
+++ b/packages/react-native-executorch/android/CMakeLists.txt
@@ -18,12 +18,14 @@ find_package(ReactAndroid REQUIRED CONFIG)
file(GLOB CORE_SOURCES ../cpp/core/*.cpp)
file(GLOB MATH_SOURCES ../cpp/extensions/math/*.cpp)
file(GLOB CV_SOURCES ../cpp/extensions/cv/*.cpp)
+file(GLOB NLP_SOURCES ../cpp/extensions/nlp/*.cpp)
add_library(${CMAKE_PROJECT_NAME} SHARED
../cpp/RnExecutorch.cpp
${CORE_SOURCES}
${CV_SOURCES}
${MATH_SOURCES}
+ ${NLP_SOURCES}
cpp-adapter.cpp
)
@@ -35,6 +37,10 @@ target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE
../cpp
${CMAKE_CURRENT_SOURCE_DIR}/../third-party/include
+ ${CMAKE_CURRENT_SOURCE_DIR}/../third-party/include/executorch/extension/llm/tokenizers/include
+ ${CMAKE_CURRENT_SOURCE_DIR}/../third-party/include/executorch/extension/llm/tokenizers/third-party/json/include
+ ${CMAKE_CURRENT_SOURCE_DIR}/../third-party/include/executorch/extension/llm/tokenizers/third-party/re2
+ ${CMAKE_CURRENT_SOURCE_DIR}/../third-party/include/executorch/extension/llm/tokenizers/third-party/abseil-cpp
)
# Ensure executorch is linked with WHOLE_ARCHIVE so all static initializers are included
diff --git a/packages/react-native-executorch/cpp/RnExecutorch.cpp b/packages/react-native-executorch/cpp/RnExecutorch.cpp
index e9724e5820..3b71b61775 100644
--- a/packages/react-native-executorch/cpp/RnExecutorch.cpp
+++ b/packages/react-native-executorch/cpp/RnExecutorch.cpp
@@ -3,6 +3,7 @@
#include "core/install.h"
#include "extensions/cv/install.h"
#include "extensions/math/install.h"
+#include "extensions/nlp/install.h"
using namespace facebook;
@@ -13,6 +14,7 @@ void install(jsi::Runtime &jsiRuntime) {
rnexecutorch::core::install(jsiRuntime, module);
rnexecutorch::extensions::cv::install(jsiRuntime, module);
rnexecutorch::extensions::math::install(jsiRuntime, module);
+ rnexecutorch::extensions::nlp::install(jsiRuntime, module);
jsiRuntime.global().setProperty(jsiRuntime, "__rnexecutorch_jsi__", std::move(module));
}
diff --git a/packages/react-native-executorch/cpp/extensions/nlp/install.cpp b/packages/react-native-executorch/cpp/extensions/nlp/install.cpp
new file mode 100644
index 0000000000..39802b5a78
--- /dev/null
+++ b/packages/react-native-executorch/cpp/extensions/nlp/install.cpp
@@ -0,0 +1,14 @@
+#include "install.h"
+#include "tokenizer.h"
+
+namespace rnexecutorch::extensions::nlp {
+namespace jsi = facebook::jsi;
+
+void install(facebook::jsi::Runtime &rt, facebook::jsi::Object &module) {
+ jsi::Object nlpModule = jsi::Object(rt);
+
+ tokenizer::install_loadTokenizer(rt, nlpModule);
+
+ module.setProperty(rt, "nlp", nlpModule);
+}
+} // namespace rnexecutorch::extensions::nlp
diff --git a/packages/react-native-executorch/cpp/extensions/nlp/install.h b/packages/react-native-executorch/cpp/extensions/nlp/install.h
new file mode 100644
index 0000000000..e2afefd727
--- /dev/null
+++ b/packages/react-native-executorch/cpp/extensions/nlp/install.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include
+
+namespace rnexecutorch::extensions::nlp {
+void install(facebook::jsi::Runtime &rt, facebook::jsi::Object &module);
+} // namespace rnexecutorch::extensions::nlp
diff --git a/packages/react-native-executorch/cpp/extensions/nlp/tokenizer.cpp b/packages/react-native-executorch/cpp/extensions/nlp/tokenizer.cpp
new file mode 100644
index 0000000000..a313b4df37
--- /dev/null
+++ b/packages/react-native-executorch/cpp/extensions/nlp/tokenizer.cpp
@@ -0,0 +1,301 @@
+#include "tokenizer.h"
+
+#include
+#include
+
+#include
+
+namespace rnexecutorch::extensions::nlp::tokenizer {
+namespace jsi = facebook::jsi;
+
+namespace {
+// Number of BOS/EOS tokens to add on top of what the tokenizer.json defines.
+// Keeping these at 0 means encoding follows the tokenizer's own post_processor
+// (i.e. special tokens are added exactly as configured in tokenizer.json).
+constexpr uint64_t kNumAddedBosTokens = 0;
+constexpr uint64_t kNumAddedEosTokens = 0;
+
+// tokenizers::Error is its own enum (not executorch::runtime::Error), and the
+// tokenizers library ships no to_string for it, so map it to a readable name.
+std::string toString(tokenizers::Error error) {
+ switch (error) {
+ case tokenizers::Error::Ok:
+ return "Ok";
+ case tokenizers::Error::Internal:
+ return "Internal";
+ case tokenizers::Error::Uninitialized:
+ return "Uninitialized";
+ case tokenizers::Error::OutOfRange:
+ return "OutOfRange";
+ case tokenizers::Error::LoadFailure:
+ return "LoadFailure";
+ case tokenizers::Error::EncodeFailure:
+ return "EncodeFailure";
+ case tokenizers::Error::Base64DecodeFailure:
+ return "Base64DecodeFailure";
+ case tokenizers::Error::ParseFailure:
+ return "ParseFailure";
+ case tokenizers::Error::DecodeFailure:
+ return "DecodeFailure";
+ case tokenizers::Error::RegexFailure:
+ return "RegexFailure";
+ }
+ return "Unknown(" + std::to_string(static_cast(error)) + ")";
+}
+} // namespace
+
+TokenizerHostObject::TokenizerHostObject(const std::string &tokenizerPath)
+ : tokenizerPath_(tokenizerPath),
+ tokenizer_(std::make_unique()) {
+ auto error = tokenizer_->load(tokenizerPath_);
+ if (error != tokenizers::Error::Ok) {
+ throw std::runtime_error("Failed to load tokenizer from '" + tokenizerPath_ +
+ "': " + toString(error));
+ }
+}
+
+jsi::Value TokenizerHostObject::get(jsi::Runtime &rt, const jsi::PropNameID &name) {
+ auto nameStr = name.utf8(rt);
+
+ if (nameStr == "path") {
+ return jsi::String::createFromUtf8(rt, tokenizerPath_);
+ }
+
+ if (nameStr == "encode") {
+ auto self = shared_from_this();
+ auto fnBody = [self](jsi::Runtime &rt, const jsi::Value &thisVal, const jsi::Value *args, size_t count) -> jsi::Value {
+ if (count != 1) {
+ throw jsi::JSError(rt, "encode: Usage: encode(text)");
+ }
+
+ if (!args[0].isString()) {
+ throw jsi::JSError(rt, "encode: Expected arg0 to be a string");
+ }
+
+ std::unique_lock lock(self->mutex_, std::try_to_lock);
+ if (!lock.owns_lock()) {
+ throw jsi::JSError(rt, "encode: Tokenizer is currently in use");
+ }
+
+ if (!self->tokenizer_) {
+ throw jsi::JSError(rt, "encode: Tokenizer has been disposed");
+ }
+
+ auto text = args[0].asString(rt).utf8(rt);
+ auto result = self->tokenizer_->encode(text, kNumAddedBosTokens, kNumAddedEosTokens);
+ if (!result.ok()) {
+ throw jsi::JSError(rt, "encode: Failed to encode input: " +
+ toString(result.error()));
+ }
+
+ const auto &ids = result.get();
+ auto jsArray = jsi::Array(rt, ids.size());
+ for (size_t i = 0; i < ids.size(); ++i) {
+ jsArray.setValueAtIndex(rt, i, static_cast(ids[i]));
+ }
+
+ return jsArray;
+ };
+ return jsi::Function::createFromHostFunction(rt, jsi::PropNameID::forAscii(rt, "encode"), 1, fnBody);
+ }
+
+ if (nameStr == "decode") {
+ auto self = shared_from_this();
+ auto fnBody = [self](jsi::Runtime &rt, const jsi::Value &thisVal, const jsi::Value *args, size_t count) -> jsi::Value {
+ if (count < 1 || count > 2) {
+ throw jsi::JSError(rt, "decode: Usage: decode(tokens, skipSpecialTokens?)");
+ }
+
+ if (!args[0].isObject() || !args[0].asObject(rt).isArray(rt)) {
+ throw jsi::JSError(rt, "decode: Expected arg0 to be an array");
+ }
+
+ // skipSpecialTokens is optional and defaults to true.
+ bool skipSpecialTokens = true;
+ if (count == 2 && !args[1].isUndefined()) {
+ if (!args[1].isBool()) {
+ throw jsi::JSError(rt, "decode: Expected arg1 to be a boolean");
+ }
+ skipSpecialTokens = args[1].asBool();
+ }
+
+ std::unique_lock lock(self->mutex_, std::try_to_lock);
+ if (!lock.owns_lock()) {
+ throw jsi::JSError(rt, "decode: Tokenizer is currently in use");
+ }
+
+ if (!self->tokenizer_) {
+ throw jsi::JSError(rt, "decode: Tokenizer has been disposed");
+ }
+
+ auto tokensArray = args[0].asObject(rt).asArray(rt);
+
+ std::vector tokens;
+ tokens.reserve(tokensArray.size(rt));
+ for (size_t i = 0; i < tokensArray.size(rt); ++i) {
+ auto val = tokensArray.getValueAtIndex(rt, i);
+ if (!val.isNumber()) {
+ throw jsi::JSError(rt, "decode: Expected tokens[" + std::to_string(i) + "] to be a number");
+ }
+ tokens.push_back(static_cast(val.asNumber()));
+ }
+
+ if (tokens.empty()) {
+ return jsi::String::createFromUtf8(rt, "");
+ }
+
+ auto result = self->tokenizer_->decode(tokens, skipSpecialTokens);
+ if (!result.ok()) {
+ throw jsi::JSError(rt, "decode: Failed to decode tokens: " +
+ toString(result.error()));
+ }
+
+ return jsi::String::createFromUtf8(rt, result.get());
+ };
+ return jsi::Function::createFromHostFunction(rt, jsi::PropNameID::forAscii(rt, "decode"), 1, fnBody);
+ }
+
+ if (nameStr == "getVocabSize") {
+ auto self = shared_from_this();
+ auto fnBody = [self](jsi::Runtime &rt, const jsi::Value &thisVal, const jsi::Value *args, size_t count) -> jsi::Value {
+ if (count != 0) {
+ throw jsi::JSError(rt, "getVocabSize: Usage: getVocabSize()");
+ }
+
+ std::unique_lock lock(self->mutex_, std::try_to_lock);
+ if (!lock.owns_lock()) {
+ throw jsi::JSError(rt, "getVocabSize: Tokenizer is currently in use");
+ }
+
+ if (!self->tokenizer_) {
+ throw jsi::JSError(rt, "getVocabSize: Tokenizer has been disposed");
+ }
+
+ return static_cast(self->tokenizer_->vocab_size());
+ };
+ return jsi::Function::createFromHostFunction(rt, jsi::PropNameID::forAscii(rt, "getVocabSize"), 0, fnBody);
+ }
+
+ if (nameStr == "idToToken") {
+ auto self = shared_from_this();
+ auto fnBody = [self](jsi::Runtime &rt, const jsi::Value &thisVal, const jsi::Value *args, size_t count) -> jsi::Value {
+ if (count != 1) {
+ throw jsi::JSError(rt, "idToToken: Usage: idToToken(id)");
+ }
+
+ if (!args[0].isNumber()) {
+ throw jsi::JSError(rt, "idToToken: Expected arg0 to be a number");
+ }
+
+ std::unique_lock lock(self->mutex_, std::try_to_lock);
+ if (!lock.owns_lock()) {
+ throw jsi::JSError(rt, "idToToken: Tokenizer is currently in use");
+ }
+
+ if (!self->tokenizer_) {
+ throw jsi::JSError(rt, "idToToken: Tokenizer has been disposed");
+ }
+
+ auto tokenId = static_cast(args[0].asNumber());
+ auto result = self->tokenizer_->id_to_piece(tokenId);
+ if (!result.ok()) {
+ throw jsi::JSError(rt, "idToToken: Failed to convert id to token: " +
+ toString(result.error()));
+ }
+
+ return jsi::String::createFromUtf8(rt, result.get());
+ };
+ return jsi::Function::createFromHostFunction(rt, jsi::PropNameID::forAscii(rt, "idToToken"), 1, fnBody);
+ }
+
+ if (nameStr == "tokenToId") {
+ auto self = shared_from_this();
+ auto fnBody = [self](jsi::Runtime &rt, const jsi::Value &thisVal, const jsi::Value *args, size_t count) -> jsi::Value {
+ if (count != 1) {
+ throw jsi::JSError(rt, "tokenToId: Usage: tokenToId(token)");
+ }
+
+ if (!args[0].isString()) {
+ throw jsi::JSError(rt, "tokenToId: Expected arg0 to be a string");
+ }
+
+ std::unique_lock lock(self->mutex_, std::try_to_lock);
+ if (!lock.owns_lock()) {
+ throw jsi::JSError(rt, "tokenToId: Tokenizer is currently in use");
+ }
+
+ if (!self->tokenizer_) {
+ throw jsi::JSError(rt, "tokenToId: Tokenizer has been disposed");
+ }
+
+ auto token = args[0].asString(rt).utf8(rt);
+ auto result = self->tokenizer_->piece_to_id(token);
+ if (!result.ok()) {
+ throw jsi::JSError(rt, "tokenToId: Failed to convert token to id: " +
+ toString(result.error()));
+ }
+
+ return static_cast(result.get());
+ };
+ return jsi::Function::createFromHostFunction(rt, jsi::PropNameID::forAscii(rt, "tokenToId"), 1, fnBody);
+ }
+
+ if (nameStr == "dispose") {
+ auto self = shared_from_this();
+ auto fnBody = [self](jsi::Runtime &rt, const jsi::Value &thisVal, const jsi::Value *args, size_t count) -> jsi::Value {
+ if (count != 0) {
+ throw jsi::JSError(rt, "dispose: Usage: dispose()");
+ }
+
+ std::unique_lock lock(self->mutex_);
+
+ if (!self->tokenizer_) {
+ throw jsi::JSError(rt, "dispose: Tokenizer has already been disposed");
+ }
+
+ self->tokenizer_.reset();
+
+ return jsi::Value::undefined();
+ };
+ return jsi::Function::createFromHostFunction(rt, jsi::PropNameID::forAscii(rt, "dispose"), 0, fnBody);
+ }
+
+ return jsi::Value::undefined();
+}
+
+std::vector TokenizerHostObject::getPropertyNames(jsi::Runtime &rt) {
+ std::vector properties;
+ properties.push_back(jsi::PropNameID::forAscii(rt, "path"));
+ properties.push_back(jsi::PropNameID::forAscii(rt, "encode"));
+ properties.push_back(jsi::PropNameID::forAscii(rt, "decode"));
+ properties.push_back(jsi::PropNameID::forAscii(rt, "getVocabSize"));
+ properties.push_back(jsi::PropNameID::forAscii(rt, "idToToken"));
+ properties.push_back(jsi::PropNameID::forAscii(rt, "tokenToId"));
+ properties.push_back(jsi::PropNameID::forAscii(rt, "dispose"));
+ return properties;
+}
+
+void install_loadTokenizer(jsi::Runtime &rt, jsi::Object &module) {
+ auto name = "loadTokenizer";
+ auto fnBody = [](jsi::Runtime &rt, const jsi::Value &thisVal, const jsi::Value *args, size_t count) -> jsi::Value {
+ if (count != 1) {
+ throw jsi::JSError(rt, "loadTokenizer: Usage: loadTokenizer(arg0)");
+ }
+
+ if (!args[0].isString()) {
+ throw jsi::JSError(rt, "loadTokenizer: Expected arg0 to be a string");
+ }
+
+ auto tokenizerPath = args[0].asString(rt).utf8(rt);
+ try {
+ auto tokenizerInstance = std::make_shared(tokenizerPath);
+ return jsi::Object::createFromHostObject(rt, tokenizerInstance);
+ } catch (const std::exception &e) {
+ throw jsi::JSError(rt, std::string("loadTokenizer: ") + e.what());
+ }
+ };
+ auto fn = jsi::Function::createFromHostFunction(rt, jsi::PropNameID::forAscii(rt, name), 1, fnBody);
+
+ module.setProperty(rt, name, fn);
+}
+} // namespace rnexecutorch::extensions::nlp::tokenizer
diff --git a/packages/react-native-executorch/cpp/extensions/nlp/tokenizer.h b/packages/react-native-executorch/cpp/extensions/nlp/tokenizer.h
new file mode 100644
index 0000000000..c85a804bb0
--- /dev/null
+++ b/packages/react-native-executorch/cpp/extensions/nlp/tokenizer.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+
+namespace rnexecutorch::extensions::nlp::tokenizer {
+class TokenizerHostObject : public facebook::jsi::HostObject,
+ public std::enable_shared_from_this {
+public:
+ // Loads the tokenizer from `tokenizerPath`; throws std::runtime_error on failure.
+ explicit TokenizerHostObject(const std::string &tokenizerPath);
+
+ facebook::jsi::Value get(facebook::jsi::Runtime &rt, const facebook::jsi::PropNameID &name) override;
+ std::vector getPropertyNames(facebook::jsi::Runtime &rt) override;
+
+private:
+ std::string tokenizerPath_;
+ std::unique_ptr tokenizer_;
+ std::mutex mutex_;
+};
+
+void install_loadTokenizer(facebook::jsi::Runtime &rt, facebook::jsi::Object &module);
+} // namespace rnexecutorch::extensions::nlp::tokenizer
diff --git a/packages/react-native-executorch/react-native-executorch.podspec b/packages/react-native-executorch/react-native-executorch.podspec
index a7b7b43882..5dc748a1ed 100644
--- a/packages/react-native-executorch/react-native-executorch.podspec
+++ b/packages/react-native-executorch/react-native-executorch.podspec
@@ -32,6 +32,10 @@ Pod::Spec.new do |s|
"HEADER_SEARCH_PATHS" => [
"\"$(PODS_TARGET_SRCROOT)/cpp\"",
"\"$(PODS_TARGET_SRCROOT)/third-party/include\"",
+ "\"$(PODS_TARGET_SRCROOT)/third-party/include/executorch/extension/llm/tokenizers/include\"",
+ "\"$(PODS_TARGET_SRCROOT)/third-party/include/executorch/extension/llm/tokenizers/third-party/json/include\"",
+ "\"$(PODS_TARGET_SRCROOT)/third-party/include/executorch/extension/llm/tokenizers/third-party/re2\"",
+ "\"$(PODS_TARGET_SRCROOT)/third-party/include/executorch/extension/llm/tokenizers/third-party/abseil-cpp\"",
].join(' '),
"WARNING_CFLAGS" => "-Wno-documentation"
diff --git a/packages/react-native-executorch/src/extensions/nlp/index.ts b/packages/react-native-executorch/src/extensions/nlp/index.ts
new file mode 100644
index 0000000000..6195f07395
--- /dev/null
+++ b/packages/react-native-executorch/src/extensions/nlp/index.ts
@@ -0,0 +1,2 @@
+export * from './tokenizer';
+export * from './tasks/tokenization';
diff --git a/packages/react-native-executorch/src/extensions/nlp/tasks/tokenization.ts b/packages/react-native-executorch/src/extensions/nlp/tasks/tokenization.ts
new file mode 100644
index 0000000000..297028f320
--- /dev/null
+++ b/packages/react-native-executorch/src/extensions/nlp/tasks/tokenization.ts
@@ -0,0 +1,27 @@
+import type { WorkletRuntime } from 'react-native-worklets';
+
+import { wrapAsync } from '../../../core/runtime';
+import { loadTokenizer } from '../tokenizer';
+
+/**
+ * Loads a tokenizer and exposes its operations with lifetime management for the
+ * `useTokenizer` hook.
+ * @category Typescript API
+ * @param tokenizerPath Absolute local path to a `tokenizer.json` file.
+ * @param runtime Optional worklet runtime thread to run the tokenizer on.
+ * @returns A promise resolving to the tokenizer operations and a `dispose`
+ * handle that releases the native tokenizer.
+ */
+export async function createTokenizer(tokenizerPath: string, runtime?: WorkletRuntime) {
+ const tokenizer = await wrapAsync(loadTokenizer, runtime)(tokenizerPath);
+ const dispose = () => tokenizer.dispose();
+
+ return {
+ encode: wrapAsync(tokenizer.encode, runtime),
+ decode: wrapAsync(tokenizer.decode, runtime),
+ getVocabSize: tokenizer.getVocabSize,
+ idToToken: tokenizer.idToToken,
+ tokenToId: tokenizer.tokenToId,
+ dispose,
+ };
+}
diff --git a/packages/react-native-executorch/src/extensions/nlp/tokenizer.ts b/packages/react-native-executorch/src/extensions/nlp/tokenizer.ts
new file mode 100644
index 0000000000..e4871e0eba
--- /dev/null
+++ b/packages/react-native-executorch/src/extensions/nlp/tokenizer.ts
@@ -0,0 +1,66 @@
+import { rnexecutorchJsi } from '../../native/bridge';
+
+declare const tokenizerBrand: unique symbol;
+
+/**
+ * A native HuggingFace-compatible tokenizer instance backed by a JSI host
+ * object. All methods are synchronous and worklet-compatible.
+ * @category Types
+ */
+export type Tokenizer = {
+ /** Absolute local path of the loaded `tokenizer.json`. */
+ readonly path: string;
+
+ /**
+ * Encodes a string into token ids (special tokens are added according to the
+ * tokenizer.json post_processor).
+ * @param text The input text to tokenize.
+ * @returns The encoded token ids.
+ */
+ encode(text: string): number[];
+
+ /**
+ * Decodes token ids back into a string.
+ * @param tokens The token ids to decode.
+ * @param skipSpecialTokens Whether to omit special tokens. Defaults to `true`.
+ * @returns The decoded text.
+ */
+ decode(tokens: number[], skipSpecialTokens?: boolean): string;
+
+ /**
+ * @returns The size of the tokenizer's vocabulary.
+ */
+ getVocabSize(): number;
+
+ /**
+ * @param id The token id to look up.
+ * @returns The token string for the given id.
+ */
+ idToToken(id: number): string;
+
+ /**
+ * @param token The token string to look up.
+ * @returns The id for the given token string.
+ */
+ tokenToId(token: string): number;
+
+ /** Releases the native tokenizer. The instance must not be used afterwards. */
+ dispose(): void;
+
+ /**
+ * Prevents plain JS objects from being cast as Tokenizers.
+ * @internal
+ */
+ readonly [tokenizerBrand]: never;
+};
+
+/**
+ * Loads a HuggingFace tokenizer from a local `tokenizer.json` file.
+ * @category Typescript API
+ * @param tokenizerPath Absolute local path to a `tokenizer.json` file.
+ * @returns The loaded tokenizer.
+ */
+export function loadTokenizer(tokenizerPath: string): Tokenizer {
+ 'worklet';
+ return rnexecutorchJsi.nlp.loadTokenizer(tokenizerPath) as Tokenizer;
+}
diff --git a/packages/react-native-executorch/src/hooks/useTokenizer.ts b/packages/react-native-executorch/src/hooks/useTokenizer.ts
new file mode 100644
index 0000000000..433ea7ac66
--- /dev/null
+++ b/packages/react-native-executorch/src/hooks/useTokenizer.ts
@@ -0,0 +1,37 @@
+import { useModel } from './useModel';
+import { useResourceDownload } from './useResourceDownload';
+import { createTokenizer } from '../extensions/nlp/tasks/tokenization';
+
+/**
+ * React hook to load and use a HuggingFace tokenizer.
+ *
+ * This hook manages downloading the `tokenizer.json` file (if it's a remote
+ * URL), loading it natively, tracking download progress and load errors, and
+ * cleaning up native memory when the component unmounts or the source changes.
+ * @category Hooks
+ * @param tokenizerPath A remote URL or local path to a `tokenizer.json` file.
+ * @param options Hook options.
+ * @param options.preventLoad If true, prevents downloading and loading the
+ * tokenizer.
+ * @returns An object containing the tokenizer's loading state, error, download
+ * progress, and tokenization functions.
+ */
+export function useTokenizer(tokenizerPath: string, options?: { preventLoad?: boolean }) {
+ const { localPath, downloadProgress, downloadError } = useResourceDownload(
+ tokenizerPath,
+ options?.preventLoad
+ );
+ const { model, error } = useModel(createTokenizer, localPath ?? null, [localPath]);
+
+ return {
+ isReady: !!model,
+ error: downloadError || error,
+ downloadProgress,
+ localPath,
+ encode: model?.encode,
+ decode: model?.decode,
+ getVocabSize: model?.getVocabSize,
+ idToToken: model?.idToToken,
+ tokenToId: model?.tokenToId,
+ };
+}
diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts
index 8f709b8433..563c5b22c0 100644
--- a/packages/react-native-executorch/src/index.ts
+++ b/packages/react-native-executorch/src/index.ts
@@ -1,5 +1,6 @@
// Hooks — primary API for app developers
export * from './hooks/useClassifier';
+export * from './hooks/useTokenizer';
export * from './hooks/useResourceDownload';
export * from './hooks/useModel';
@@ -9,6 +10,7 @@ export * as constants from './constants';
// Task APIs — for developers needing manual lifetime/disposal control
export * from './extensions/cv/tasks/classification';
+export * from './extensions/nlp/tasks/tokenization';
// Core primitives — for library builders and power users
export { tensor } from './core/tensor';
@@ -31,6 +33,7 @@ export { defaultWorkletRuntime, wrapAsync } from './core/runtime';
export * as math from './extensions/math';
export * as cv from './extensions/cv';
+export * as nlp from './extensions/nlp';
// Utils
export * from './utils';
diff --git a/packages/react-native-executorch/src/models.ts b/packages/react-native-executorch/src/models.ts
index 66a88e937b..dfb0b177e1 100644
--- a/packages/react-native-executorch/src/models.ts
+++ b/packages/react-native-executorch/src/models.ts
@@ -27,6 +27,11 @@ const EFFICIENTNET_V2_S_COREML_FP16: ClassifierModel = {
classifierOpts: EFFICIENTNET_V2_S_OPTS,
};
+// =============================================================================
+// Tokenizers
+// =============================================================================
+const ALL_MINILM_L6_V2_TOKENIZER = `${BASE_URL}-all-MiniLM-L6-v2/${VERSION_TAG}/tokenizer.json`;
+
/**
* Registry of pre-configured ExecuTorch models.
*
@@ -44,4 +49,7 @@ export const models = {
COREML_FP16: EFFICIENTNET_V2_S_COREML_FP16,
},
},
+ tokenizer: {
+ ALL_MINILM_L6_V2: ALL_MINILM_L6_V2_TOKENIZER,
+ },
};
diff --git a/packages/react-native-executorch/third-party/README.md b/packages/react-native-executorch/third-party/README.md
index cf3f2e0bb2..b05f958921 100644
--- a/packages/react-native-executorch/third-party/README.md
+++ b/packages/react-native-executorch/third-party/README.md
@@ -5,7 +5,10 @@ Native ExecuTorch binaries and headers are **not** committed to this branch.
The core package's `android/CMakeLists.txt` and `react-native-executorch.podspec`
expect ExecuTorch artifacts under this directory:
-- `include/` — ExecuTorch + c10 + torch headers
+- `include/` — ExecuTorch + c10 + torch headers, including the `pytorch/tokenizers`
+ headers under `include/executorch/extension/llm/tokenizers/include` (used by the
+ nlp/tokenizer extension; the `tokenizers::HFTokenizer` symbols resolve from
+ `libexecutorch`, which is built with the llm/tokenizers extension)
- `android/jniLibs//libexecutorch.so`, `android/libs/executorch.jar`
- `ios/Frameworks/ExecutorchLib.xcframework`
diff --git a/yarn.lock b/yarn.lock
index f53aea2319..bc05759538 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -11892,6 +11892,36 @@ __metadata:
languageName: node
linkType: hard
+"nlp@workspace:apps/nlp":
+ version: 0.0.0-use.local
+ resolution: "nlp@workspace:apps/nlp"
+ dependencies:
+ "@babel/core": "npm:^7.29.0"
+ "@react-native/metro-config": "npm:^0.86.0"
+ "@react-navigation/drawer": "npm:^7.9.4"
+ "@react-navigation/native": "npm:^7.2.2"
+ "@types/react": "npm:~19.2.0"
+ babel-preset-expo: "npm:~56.0.14"
+ expo: "npm:~56.0.9"
+ expo-build-properties: "npm:~56.0.17"
+ expo-constants: "npm:~56.0.17"
+ expo-linking: "npm:~56.0.13"
+ expo-router: "npm:~56.2.9"
+ react: "npm:19.2.3"
+ react-native: "npm:0.85.3"
+ react-native-drawer-layout: "npm:^4.2.2"
+ react-native-executorch: "workspace:*"
+ react-native-gesture-handler: "npm:~2.31.1"
+ react-native-reanimated: "npm:4.4.0"
+ react-native-safe-area-context: "npm:~5.7.0"
+ react-native-screens: "npm:~4.25.2"
+ react-native-svg: "npm:15.15.4"
+ react-native-svg-transformer: "npm:^1.5.3"
+ react-native-worklets: "npm:0.9.1"
+ react-refresh: "npm:^0.14.0"
+ languageName: unknown
+ linkType: soft
+
"no-case@npm:^3.0.4":
version: 3.0.4
resolution: "no-case@npm:3.0.4"