diff --git a/apps/nlp/app.json b/apps/nlp/app.json new file mode 100644 index 0000000000..929d222914 --- /dev/null +++ b/apps/nlp/app.json @@ -0,0 +1,45 @@ +{ + "expo": { + "name": "nlp", + "slug": "nlp", + "version": "1.0.0", + "orientation": "portrait", + "icon": "./assets/icons/icon.png", + "userInterfaceStyle": "light", + "newArchEnabled": true, + "scheme": "rne-nlp", + "splash": { + "image": "./assets/icons/splash.png", + "resizeMode": "contain", + "backgroundColor": "#ffffff" + }, + "ios": { + "supportsTablet": true, + "bundleIdentifier": "com.anonymous.nlp" + }, + "android": { + "adaptiveIcon": { + "foregroundImage": "./assets/icons/adaptive-icon.png", + "backgroundColor": "#ffffff" + }, + "package": "com.anonymous.nlp" + }, + "web": { + "favicon": "./assets/icons/favicon.png" + }, + "plugins": [ + "expo-router", + [ + "expo-build-properties", + { + "android": { + "minSdkVersion": 26 + }, + "ios": { + "deploymentTarget": "17.0" + } + } + ] + ] + } +} diff --git a/apps/nlp/app/_layout.tsx b/apps/nlp/app/_layout.tsx new file mode 100644 index 0000000000..bdcfc39660 --- /dev/null +++ b/apps/nlp/app/_layout.tsx @@ -0,0 +1,32 @@ +import { Drawer } from 'expo-router/drawer'; +import { ColorPalette } from '../theme'; +import React from 'react'; + +export default function Layout() { + return ( + + null, + title: 'Main Menu', + drawerItemStyle: { display: 'none' }, + }} + /> + + + ); +} diff --git a/apps/nlp/app/index.tsx b/apps/nlp/app/index.tsx new file mode 100644 index 0000000000..98ff59ab97 --- /dev/null +++ b/apps/nlp/app/index.tsx @@ -0,0 +1,51 @@ +import { useRouter } from 'expo-router'; +import { View, Text, StyleSheet, TouchableOpacity } from 'react-native'; +import { ColorPalette } from '../theme'; +import ExecutorchLogo from '../assets/icons/executorch.svg'; + +export default function Home() { + const router = useRouter(); + + return ( + + + Select a demo + + router.navigate('tokenizer/')}> + Tokenizer + + + + ); +} + +const styles = StyleSheet.create({ + container: { + flex: 1, + justifyContent: 'center', + alignItems: 'center', + backgroundColor: '#fff', + }, + headerText: { + fontSize: 18, + color: ColorPalette.strongPrimary, + margin: 20, + }, + buttonContainer: { + width: '80%', + justifyContent: 'space-evenly', + marginBottom: 20, + }, + button: { + backgroundColor: ColorPalette.strongPrimary, + borderRadius: 8, + padding: 14, + alignItems: 'center', + marginBottom: 12, + }, + buttonText: { + color: 'white', + fontSize: 16, + fontWeight: '600', + }, +}); diff --git a/apps/nlp/app/tokenizer/index.tsx b/apps/nlp/app/tokenizer/index.tsx new file mode 100644 index 0000000000..dbaf0ed3d0 --- /dev/null +++ b/apps/nlp/app/tokenizer/index.tsx @@ -0,0 +1,272 @@ +import React, { useEffect, useRef, useState } from 'react'; +import { View, Text, TextInput, ScrollView, StyleSheet } from 'react-native'; +import { useTokenizer, models } from 'react-native-executorch'; +import ScreenWrapper from '../../components/ScreenWrapper'; +import { ModelStatus } from '../../components/ModelStatus'; +import { Button } from '../../components/Button'; +import { theme } from '../../theme'; + +type Check = { label: string; detail: string; pass: boolean }; + +function TokenizerContent() { + const { isReady, downloadProgress, error, encode, decode, getVocabSize, idToToken, tokenToId } = + useTokenizer(models.tokenizer.ALL_MINILM_L6_V2); + + const [text, setText] = useState('Hello world'); + const [running, setRunning] = useState(false); + const [runError, setRunError] = useState(null); + const [ids, setIds] = useState(null); + const [roundTrip, setRoundTrip] = useState(null); + const [vocabSize, setVocabSize] = useState(null); + const [checks, setChecks] = useState([]); + + const ready = isReady && encode && decode && getVocabSize && idToToken && tokenToId; + + const run = async () => { + if (!ready) return; + setRunning(true); + setRunError(null); + setIds(null); + setRoundTrip(null); + setVocabSize(null); + setChecks([]); + try { + const tokenIds = await encode(text); + const decoded = await decode(tokenIds, true); + const vocab = getVocabSize(); + + // Self-consistent inverse check on a token from the actual output + // (HFTokenizer adds special tokens per the tokenizer.json post_processor). + const sampleId = tokenIds[Math.min(1, tokenIds.length - 1)]!; + const sampleToken = idToToken(sampleId); + const sampleIdBack = tokenToId(sampleToken); + + const nextChecks: Check[] = [ + { + label: 'Round-trip decode(encode(text))', + detail: `"${decoded}" vs "${text.toLowerCase()}"`, + // all-MiniLM-L6-v2 is an uncased BERT WordPiece tokenizer + pass: decoded.trim() === text.trim().toLowerCase(), + }, + { + label: 'Vocabulary size', + detail: `${vocab} (expected 30522 for bert-base-uncased)`, + pass: vocab === 30522, + }, + { + label: 'Inverse tokenToId(idToToken(id))', + detail: `${sampleId} → "${sampleToken}" → ${sampleIdBack}`, + pass: sampleIdBack === sampleId, + }, + ]; + + setIds(tokenIds); + setRoundTrip(decoded); + setVocabSize(vocab); + setChecks(nextChecks); + + // Structured log so the result is verifiable from device/Metro logs. + console.log( + '[TokenizerTest]', + JSON.stringify({ + allPass: nextChecks.every((c) => c.pass), + input: text, + ids: tokenIds, + decoded, + vocab, + checks: nextChecks.map((c) => ({ label: c.label, pass: c.pass, detail: c.detail })), + }) + ); + } catch (e: any) { + console.log('[TokenizerTest] ERROR', e?.message ?? String(e)); + setRunError(e?.message ?? String(e)); + } finally { + setRunning(false); + } + }; + + // Auto-run once as soon as the tokenizer is ready, so the demo doubles as a + // self-checking smoke test (results logged under "[TokenizerTest]"). + const autoRan = useRef(false); + useEffect(() => { + if (ready && !autoRan.current) { + autoRan.current = true; + run(); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [ready]); + + return ( + + + Tokenizer + + Loads the all-MiniLM-L6-v2 tokenizer and proves encode / decode / getVocabSize / idToToken + / tokenToId work end-to-end against the native HFTokenizer. + + + + + + + +