diff --git a/V1.7.1/README.md b/V1.7.1/README.md new file mode 100644 index 0000000..d039ba8 --- /dev/null +++ b/V1.7.1/README.md @@ -0,0 +1,82 @@ +# V1.7 Speech Agent + +A voice-driven AI assistant that listens for spoken queries, performs local actions, optionally captures images for visual requests, and answers using Ollama chat models. + +## Features + +- Voice input with Whisper speech recognition +- Text-to-speech output using `edge-tts` +- Intelligent command handling with Ollama chat models +- App launching on macOS for `open ` voice commands +- Web search query simplification and results retrieval +- Vision-context detection and camera image capture for visual queries +- ANSI-enhanced console output with `rich` + +## Requirements + +- macOS +- Python 3.11+ (recommended) +- Microphone access +- Camera access for image-based queries +- Installed Python packages: + - `ollama` + - `speech_recognition` + - `edge_tts` + - `ddgs` + - `opencv-python` + - `rich` + +## Installation + +1. Clone or copy this folder: + +```bash +cd /path/to/AI/OSS_speech_model/Phase-1/V1.7 +``` + +2. Create and activate a Python virtual environment: + +```bash +python3 -m venv venv +source venv/bin/activate +``` + +3. Install required packages: + +```bash +pip install ollama speech_recognition edge_tts ddgs opencv-python rich +``` + +4. Make sure your environment has access to Ollama. + +## Usage + +Run the assistant with: + +```bash +python main.py +``` + +The assistant will prompt: + +- Choose a voice: `Male` or `Female` +- Speak a query after the prompt +- It will attempt to answer, launch apps, or capture an image if visual context is needed +- When finished, it asks whether you want to continue + +## Notes + +- The current implementation uses `afplay` to play audio on macOS. +- App launching relies on `mdfind` and the local macOS application metadata. +- If the assistant cannot understand audio, it prompts again. +- Visual queries are captured and saved temporarily as `instant_photo.png` in the current folder. + +## Troubleshooting + +- If speech recognition fails, verify microphone permissions and `speech_recognition` installation. +- If camera capture fails, verify camera permissions and that `opencv-python` can access the device. +- If audio playback fails, ensure `afplay` is available on macOS. + +## License + +This project does not include a license. Add one if needed. diff --git a/V1.7.1/main.py b/V1.7.1/main.py new file mode 100644 index 0000000..be64e76 --- /dev/null +++ b/V1.7.1/main.py @@ -0,0 +1,200 @@ +# All features in V1.7 plus improved AI models. +import ollama +import speech_recognition as sr +import edge_tts +import asyncio +import tempfile +from ollama import chat +import ddgs +import os +import cv2 +import re +import shutil +import time +from rich.console import Console +from rich.text import Text + +console = Console() +terminal_width = os.get_terminal_size().columns +columns, _ = shutil.get_terminal_size() + +voice_selector = input("Choose a voice (Male/Female): ").lower().strip() +if voice_selector == "male": + VOICE = "en-US-SteffanNeural" +else: + VOICE = "en-US-AvaNeural" + +ANSI_BOLD = "\033[1m" +ANSI_ITALIC = "\033[3m" +ANSI_RESET = "\033[0m" + +def format_for_terminal(text): + if text is None: + return "" + text = re.sub(r'\*\*(.*?)\*\*', f"{ANSI_BOLD}\\1{ANSI_RESET}", text) + text = re.sub(r'\*(.*?)\*', f"{ANSI_ITALIC}\\1{ANSI_RESET}", text) + return text +def clean_for_speech(text): + if text is None: + return "" + return re.sub(r'\*\*|\*', '', text) + +async def speak_async(text): + clean_text = clean_for_speech(text) + with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: + temp_path = fp.name + communicate = edge_tts.Communicate(clean_text, VOICE) + await communicate.save(temp_path) + os.system(f'afplay "{temp_path}"') + os.remove(temp_path) +def speak(text): + asyncio.run(speak_async(text)) + +answer = [] +is_app_open = 0 +is_recognised = 0 +r = sr.Recognizer() + +while True: + is_app_open = 0 + is_recognised = 0 + + with sr.Microphone() as source: + r.adjust_for_ambient_noise(source, duration=0.2) + Ask_anything = "*Ask Me Anything...*" + speak(Ask_anything) + console.print(Text.from_ansi(format_for_terminal(Ask_anything))) + audio = r.listen(source) + try: + text = r.recognize_whisper(audio, model="small.en") + console.print(" ", style="cornsilk1 on gray19", justify="left") + console.print(Text.from_ansi(format_for_terminal(text)), style="cornsilk1 on gray19", justify="left") + console.print(" ", style="cornsilk1 on gray19", justify="left") + except sr.UnknownValueError: + console.print(Text.from_ansi(format_for_terminal("Could not understand audio"))) + text = "" + + if "open" in text.strip(".").strip(",").strip("!").strip("?").lower(): + parts = text.split(maxsplit=1) + if len(parts) > 1: + target = parts[1].strip() + console.print(Text.from_ansi(format_for_terminal(f"**Opening {target}**"))) + app = os.popen( + f'mdfind \'kMDItemKind == "Application"\' | grep -i "{target}" | head -n 1' + ).read().strip() + if app: + os.system(f'open "{app}"') + else: + os.system(f'open "https://{target.replace(" ", "")}.com"') + is_app_open += 1 + + if text == "": + console.print(Text.from_ansi(format_for_terminal("No input detected. Please try again."))) + pass + else: + if is_app_open == 0: + web_simplifier = chat( + model='ministral-3:3b-cloud', + messages=[ + {'role': 'user','content': text}, + {'role': 'system','content': f'''You are a web-search query simplifier.Your job:Convert the user's message into ONE concise web search query.User message:"{text}"Rules:- Keep only important keywords- Remove filler words and stop words- No emojis- No explanations- Keep the meaning accurate- Make it optimized for a search engine- Output ONLY the final search queryFormat:latest details on as of 2026'''} + ] + ) + trigger = chat( + model='ministral-3:14b-cloud', + messages=[ + {'role': 'user','content': text}, + {'role': 'system','content': f'''You are a vision-context detector.Determine whether answering the user's query requires:- a camera image- surroundings analysis- appearance analysis- object inspection- environmental contextUser query:"{text}"Respond ONLY with:yesornoSay YES only if:- the user refers to themselves- the user refers to their surroundings- the user asks about appearance- the user asks to inspect something visible- the answer requires visual contextExamples of YES:- "How do I look?"- "What's in front of me?"- "Analyze my room"- "What is this object?"- "Does my hair look good?"Examples of NO:- news- coding- facts- explanations- web searches- math- history- general questionsBe accurate.Do not guess.Output ONLY yes or no.No punctuation.No emojis.'''} + ] + ) + if "yes" in trigger.message.content.lower(): + is_recognised = 1 + console.print(Text.from_ansi(format_for_terminal("**Capturing photo...**"))) + cam = cv2.VideoCapture(0) + ret, frame = cam.read() + if ret: + photo_path = os.path.join(os.getcwd(), "instant_photo.png") + cv2.imwrite(photo_path, frame) + console.print(Text.from_ansi(format_for_terminal("**Photo captured successfully!**"))) + else: + console.print(Text.from_ansi(format_for_terminal("**Error: Could not access camera.**"))) + photo_path = None + cam.release() + if photo_path: + image_analysis = ollama.chat( + model='ministral-3:8b-cloud', + messages=[ + {'role': 'system','content': text}, + {'role': 'user','content': f'''Analyze the image based on the user's request.User request:"{text}"Instructions:- Focus mainly on the requested subject- If no subject is specified, analyze the surroundings- Be concise but useful- Be truthful and accurate- Mention important visible details- Do not hallucinate- No emojis- No unnecessary formatting- Keep the response natural and clear''', + 'images': [photo_path] + } + ] + ) + img_out = image_analysis.message.content + console.print(Text.from_ansi(format_for_terminal(img_out))) + answer.append(img_out) + speak(img_out) + else: + pass + + if is_recognised == 1 and is_app_open == 1: + pass + else: + check = web_simplifier.message.content + results = ddgs.DDGS().text( + check, + max_results=3 + ) + response = chat( + model='gemma4:31b-cloud', + messages=[ + {'role': 'user','content': text }, + {'role': 'system','content': f'''You are Churo.Personality:- helpful- professional- intelligent- concise- accurate- natural soundingRules:- Use simple language- Keep responses concise- No emojis- Do not ramble- Answer directly- Be conversational but efficientMemory context:{answer}, use this when you feel that the query lacks context. Current user query:{text}Available web search results:{results}Use the web results ONLY if the user explicitly asks for:- latest news- recent updates- current information- newest details- web searchesOtherwise answer normally without relying on web results.If image analysis was already provided,do not repeat the analysis.Simply continue the conversation naturally.Never say:"Analysis provided"Instead continue naturally and intelligently.'''} + ] + ) + output = response.message.content + formatted_output = format_for_terminal(output) + speech_output = clean_for_speech(output) + aligned = formatted_output.rjust(terminal_width) + console.print(" ") + console.print(" ", style="cornsilk1 on gray15", justify="left") + console.print(Text.from_ansi(aligned), style="cornsilk1 on gray15", justify="left") + console.print(" ", style="cornsilk1 on gray15", justify="left") + console.print() + answer.append(output) + speak(speech_output) + + voice_recogniser = sr.Recognizer() + with sr.Microphone() as source1: + r.adjust_for_ambient_noise(source1, duration=0.2) + prompt_text = "*Do you want to continue the conversation? Yes or No?*" + console.print(Text.from_ansi(format_for_terminal(prompt_text))) + speak("Do you want to continue the conversation? Yes or No?") + time.sleep(0.01) + console.print(Text.from_ansi(format_for_terminal("*Listening for your response...*"))) + audio1 = voice_recogniser.listen(source1) + try: + voice_continue_recogniser = voice_recogniser.recognize_whisper(audio1, model="small.en") + voice_continue_recogniser = voice_continue_recogniser.strip() + console.print(" ", style="cornsilk1 on gray19", justify="left") + console.print(Text.from_ansi(format_for_terminal(voice_continue_recogniser)), style="cornsilk1 on gray19", justify="left") + console.print(" ", style="cornsilk1 on gray19", justify="left") + except sr.UnknownValueError: + console.print(Text.from_ansi(format_for_terminal("Could not understand audio"))) + print() + + norm = re.sub(r'[^\w]', '', voice_continue_recogniser).lower() + positive_patterns = { + "yes","yeah","yep","absolutely","sure","continue","ok","okay","y","yea" + } + if norm == "": + console.print(Text.from_ansi(format_for_terminal("**No input detected.**"))) + pass + elif norm not in positive_patterns: + speak("Bye! Please Visit Again!") + break + else: + continue + + else: + continue \ No newline at end of file diff --git a/V1.8/README.md b/V1.8/README.md new file mode 100644 index 0000000..e20d9f9 --- /dev/null +++ b/V1.8/README.md @@ -0,0 +1,139 @@ +# V1.8 Speech Agent + +V1.8 is an experimental voice-first AI assistant. It listens to spoken prompts, responds out loud, can launch apps on macOS, can search the web for current context, can inspect images from a webcam, and can generate images when the user asks for a visual result. + +## What It Does + +This project combines several assistant behaviors into one loop: + +- Speech-to-text using Whisper through `speech_recognition` +- Text-to-speech using `edge-tts` +- App launching on macOS for commands such as `open Safari` +- Web query simplification and search retrieval through DDGS +- Webcam-based vision analysis for appearance or environment questions +- Image generation with Stable Diffusion +- Terminal-friendly output formatting with `rich` + +## Who This Is For + +This repository is intended for developers and hobbyists who want to explore a local voice assistant workflow. It is especially useful if you are interested in: + +- voice interfaces +- multimodal AI interactions +- local automation on macOS +- image generation pipelines +- combining web search, vision, and speech in a single assistant + +## Requirements + +- macOS +- Python 3.11 or newer is recommended +- A microphone with system permission enabled +- A camera with system permission enabled if you want vision features +- Ollama installed and available on the machine running the script +- `chafa` installed if you want terminal previews for generated images +- Hardware that can run the configured Stable Diffusion pipeline on `mps`, or code changes to target a different device + +## Python Dependencies + +The script uses the following Python packages: + +- `torch` +- `ollama` +- `speech_recognition` +- `edge_tts` +- `ddgs` +- `opencv-python` +- `rich` +- `diffusers` +- `term-image` + +## Installation + +1. Clone the repository and open the `V1.8` folder. + +2. Create a virtual environment: + +```bash +python3 -m venv venv +source venv/bin/activate +``` + +3. Install the dependencies: + +```bash +pip install torch ollama SpeechRecognition edge-tts ddgs opencv-python rich diffusers term-image +``` + +4. Make sure Ollama can access the models referenced in `main.py`. + +## Usage + +Run the assistant with: + +```bash +python main.py +``` + +On startup, the program asks you to choose a voice: + +- `Male` selects `en-US-SteffanNeural` +- Any other input selects `en-US-AvaNeural` + +Then the assistant will: + +1. Prompt you to speak +2. Transcribe your speech +3. Decide whether the request is for app launching, image generation, vision analysis, or a normal answer +4. Speak the response back to you +5. Ask whether you want to continue the conversation + +## How It Works + +### App Launching + +If the transcription includes `open`, the assistant tries to find a matching application on macOS. If no local app is found, it falls back to opening a website based on the target name. + +### Web Answers + +For general questions, the assistant first simplifies the query and fetches recent search results. The response model can use those results when the request is about news, current events, or recent information. + +### Vision Mode + +If the prompt seems to require visual context, the assistant captures a frame from the webcam, saves it locally, and sends it to a vision-capable model for analysis. + +### Image Generation + +If the prompt is recognized as an image request, the assistant converts it into a short image prompt, generates an image with Stable Diffusion, saves the result as `generated_image.png`, and displays it in the terminal. + +## Limitations + +- The current implementation is macOS-focused. +- The assistant depends on several external models and services. +- The Stable Diffusion pipeline is loaded at startup, which may be slow on lower-powered machines. +- The current code stores generated and captured images in the working directory. +- The app-launching behavior is intentionally simple and may not match every app name perfectly. + +## Future Opportunities + +This version leaves room for several improvements: + +- Add cross-platform support beyond macOS +- Make the model names and device selection configurable through environment variables or a config file +- Add a proper command parser for app launching instead of relying on keyword matching +- Add a conversation history file or database +- Add streaming responses so users hear partial answers sooner +- Add a richer UI for desktop or web use +- Add safer image handling and cleanup for generated files +- Add a setup script or dependency file for easier installation + +## Troubleshooting + +- If microphone input fails, check system permissions and verify `speech_recognition` is installed correctly. +- If camera capture fails, check camera permissions and confirm OpenCV can access the device. +- If image generation fails, verify that your hardware supports the configured device target or update the pipeline configuration. +- If terminal image preview fails, install `chafa` and confirm it is available in your PATH. + +## License + +No license has been added yet. Add one before publishing or distributing the project widely. diff --git a/V1.8/main.py b/V1.8/main.py new file mode 100644 index 0000000..693bc99 --- /dev/null +++ b/V1.8/main.py @@ -0,0 +1,237 @@ +# All features in V1.7.1 plus image generation capablities. +import torch +import ollama +import speech_recognition as sr +import edge_tts +import asyncio +import tempfile +from ollama import chat +import ddgs +import os +import cv2 +import re +import shutil +import time +import subprocess +from rich.console import Console +from rich.text import Text +from diffusers import StableDiffusionPipeline +from term_image.image import from_file + +pipe = StableDiffusionPipeline.from_pretrained("nota-ai/bk-sdm-small", torch_dtype=torch.float16) +pipe = pipe.to("mps") + +console = Console() +terminal_width = os.get_terminal_size().columns +columns, _ = shutil.get_terminal_size() + +voice_selector = input("Choose a voice (Male/Female): ").lower().strip() +if voice_selector == "male": + VOICE = "en-US-SteffanNeural" +else: + VOICE = "en-US-AvaNeural" + +ANSI_BOLD = "\033[1m" +ANSI_ITALIC = "\033[3m" +ANSI_RESET = "\033[0m" + +def format_for_terminal(text): + if text is None: + return "" + text = re.sub(r'\*\*(.*?)\*\*', f"{ANSI_BOLD}\\1{ANSI_RESET}", text) + text = re.sub(r'\*(.*?)\*', f"{ANSI_ITALIC}\\1{ANSI_RESET}", text) + return text +def clean_for_speech(text): + if text is None: + return "" + return re.sub(r'\*\*|\*', '', text) + +async def speak_async(text): + clean_text = clean_for_speech(text) + with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: + temp_path = fp.name + communicate = edge_tts.Communicate(clean_text, VOICE) + await communicate.save(temp_path) + os.system(f'afplay "{temp_path}"') + os.remove(temp_path) +def speak(text): + asyncio.run(speak_async(text)) + +answer = [] +is_app_open = 0 +is_recognised = 0 +r = sr.Recognizer() + +while True: + is_app_open = 0 + is_recognised = 0 + + with sr.Microphone() as source: + r.adjust_for_ambient_noise(source, duration=0.2) + Ask_anything = "*Ask Me Anything...*" + speak(Ask_anything) + console.print(Text.from_ansi(format_for_terminal(Ask_anything))) + audio = r.listen(source) + try: + text = r.recognize_whisper(audio, model="small.en") + console.print(" ", style="cornsilk1 on gray19", justify="left") + console.print(Text.from_ansi(format_for_terminal(text)), style="cornsilk1 on gray19", justify="left") + console.print(" ", style="cornsilk1 on gray19", justify="left") + except sr.UnknownValueError: + console.print(Text.from_ansi(format_for_terminal("Could not understand audio"))) + text = "" + + if "open" in text.strip(".").strip(",").strip("!").strip("?").lower(): + parts = text.split(maxsplit=1) + if len(parts) > 1: + target = parts[1].strip() + console.print(Text.from_ansi(format_for_terminal(f"**Opening {target}**"))) + app = os.popen( + f'mdfind \'kMDItemKind == "Application"\' | grep -i "{target}" | head -n 1' + ).read().strip() + if app: + os.system(f'open "{app}"') + else: + os.system(f'open "https://{target.replace(" ", "")}.com"') + is_app_open += 1 + + if text == "": + console.print(Text.from_ansi(format_for_terminal("No input detected. Please try again."))) + pass + else: + if is_app_open == 0: + web_simplifier = chat( + model='ministral-3:3b-cloud', + messages=[ + {'role': 'user','content': text}, + {'role': 'system','content': f'''You are a web-search query simplifier.Your job:Convert the user's message into ONE concise web search query.User message:"{text}"Rules:- Keep only important keywords- Remove filler words and stop words- No emojis- No explanations- Keep the meaning accurate- Make it optimized for a search engine- Output ONLY the final search queryFormat:latest details on as of 2026'''} + ] + ) + + image_generation_trigger = chat( + model='ministral-3:14b-cloud', + messages=[ + {'role': 'user','content': text}, + {'role': 'system','content': f'''You are an image generation trigger detector.Determine whether the user's query requires generating an image or not.User query:"{text}"Respond ONLY with:yesornoSay YES only if:- the user explicitly asks for an image- the user requests a visual representation of something- the answer requires generating an imageExamples of YES:- "Generate an image of a sunset over mountains"- "Create a picture of a futuristic city skyline"- "I want to see a visual representation of a dragon"- "Can you make an illustration of a robot?"Examples of NO:- news- coding- facts- explanations- web searches- math- history- general questionsBe accurate.Do not guess.Output ONLY yes or no.No punctuation.No emojis.'''} + ] + ) + + if "yes" in image_generation_trigger.message.content.lower(): + image_generation_filter = chat( + model='ministral-3:3b-cloud', + messages=[ + {'role': 'user','content': text}, + {'role': 'system','content': f"""You are an expert prompt to image prompt generator. Today your goal is to convert "{text}" into a proper prompt for an image model. This is an example you can follow:- User:"Can you generate an image of a sunset over mountains?" this is what you have to do: "Generate a realistic image of a sunset over mountains". You are maximum only allowed to use 10 words, anything higher will not be tollerated."""} + ] + ) + + prompt = image_generation_filter.message.content + image = pipe(prompt, num_inference_steps=20).images[0] + image.save("generated_image.png") + small_width = 60 + subprocess.run([ + "chafa", + "/Users/lakshyaprajapati/Documents/Repositories/generated_image.png", + "--symbols", "block", + f"--size={small_width}" + ]) + + else: + + vision_trigger = chat( + model='ministral-3:14b-cloud', + messages=[ + {'role': 'user','content': text}, + {'role': 'system','content': f'''You are a vision-context detector.Determine whether answering the user's query requires:- a camera image- surroundings analysis- appearance analysis- object inspection- environmental contextUser query:"{text}"Respond ONLY with:yesornoSay YES only if:- the user refers to themselves- the user refers to their surroundings- the user asks about appearance- the user asks to inspect something visible- the answer requires visual contextExamples of YES:- "How do I look?"- "What's in front of me?"- "Analyze my room"- "What is this object?"- "Does my hair look good?"Examples of NO:- news- coding- facts- explanations- web searches- math- history- general questionsBe accurate.Do not guess.Output ONLY yes or no.No punctuation.No emojis.'''} + ] + ) + if "yes" in vision_trigger.message.content.lower(): + is_recognised = 1 + console.print(Text.from_ansi(format_for_terminal("**Capturing photo...**"))) + cam = cv2.VideoCapture(0) + ret, frame = cam.read() + if ret: + photo_path = os.path.join(os.getcwd(), "instant_photo.png") + cv2.imwrite(photo_path, frame) + console.print(Text.from_ansi(format_for_terminal("**Photo captured successfully!**"))) + else: + console.print(Text.from_ansi(format_for_terminal("**Error: Could not access camera.**"))) + photo_path = None + cam.release() + if photo_path: + image_analysis = ollama.chat( + model='ministral-3:8b-cloud', + messages=[ + {'role': 'system','content': text}, + {'role': 'user','content': f'''Analyze the image based on the user's request.User request:"{text}"Instructions:- Focus mainly on the requested subject- If no subject is specified, analyze the surroundings- Be concise but useful- Be truthful and accurate- Mention important visible details- Do not hallucinate- No emojis- No unnecessary formatting- Keep the response natural and clear''', + 'images': [photo_path] + } + ] + ) + img_out = image_analysis.message.content + console.print(Text.from_ansi(format_for_terminal(img_out))) + answer.append(img_out) + speak(img_out) + else: + pass + + if is_recognised == 1 and is_app_open == 1: + pass + else: + check = web_simplifier.message.content + results = ddgs.DDGS().text( + check, + max_results=3 + ) + + response = chat( + model='gemma4:31b-cloud', + messages=[ + {'role': 'user','content': text }, + {'role': 'system','content': f'''You are Churo.Personality:- helpful- professional- intelligent- concise- accurate- natural soundingRules:- Use simple language- Keep responses concise- No emojis- Do not ramble- Answer directly- Be conversational but efficientMemory context:{answer}, use this when you feel that the query lacks context. Current user query:{text}Available web search results:{results}Use the web results ONLY if the user explicitly asks for:- latest news- recent updates- current information- newest details- web searchesOtherwise answer normally without relying on web results.If image analysis was already provided,do not repeat the analysis.Simply continue the conversation naturally.Never say:"Analysis provided"Instead continue naturally and intelligently.'''} + ] + ) + output = response.message.content + formatted_output = format_for_terminal(output) + speech_output = clean_for_speech(output) + aligned = formatted_output.rjust(terminal_width) + console.print(" ") + console.print(" ", style="cornsilk1 on gray15", justify="left") + console.print(Text.from_ansi(aligned), style="cornsilk1 on gray15", justify="left") + console.print(" ", style="cornsilk1 on gray15", justify="left") + console.print() + answer.append(output) + speak(speech_output) + + voice_recogniser = sr.Recognizer() + with sr.Microphone() as source1: + r.adjust_for_ambient_noise(source1, duration=0.2) + prompt_text = "*Do you want to continue the conversation? Yes or No?*" + console.print(Text.from_ansi(format_for_terminal(prompt_text))) + speak("Do you want to continue the conversation? Yes or No?") + time.sleep(0.01) + console.print(Text.from_ansi(format_for_terminal("*Listening for your response...*"))) + audio1 = voice_recogniser.listen(source1) + try: + voice_continue_recogniser = voice_recogniser.recognize_whisper(audio1, model="small.en") + voice_continue_recogniser = voice_continue_recogniser.strip() + console.print(" ", style="cornsilk1 on gray19", justify="left") + console.print(Text.from_ansi(format_for_terminal(voice_continue_recogniser)), style="cornsilk1 on gray19", justify="left") + console.print(" ", style="cornsilk1 on gray19", justify="left") + except sr.UnknownValueError: + console.print(Text.from_ansi(format_for_terminal("Could not understand audio"))) + print() + + norm = re.sub(r'[^\w]', '', voice_continue_recogniser).lower() + positive_patterns = positive_patterns = {"y", "Y", "yes", "Yes", "YES", "yep", "Yep", "yeah", "Yeah", "yup", "Yup", "sure", "Sure", "ok", "OK", "okay", "Okay", "affirmative", "Affirmative", "certainly", "Certainly", "definitely", "Definitely", "absolutely", "Absolutely", "indeed", "Indeed", "true", "True", "continue"} + if norm == "": + console.print(Text.from_ansi(format_for_terminal("**No input detected.**"))) + pass + elif norm not in positive_patterns: + speak("Bye! Please Visit Again!") + break + else: + continue + + else: + continue \ No newline at end of file diff --git a/V1.8/test2.py b/V1.8/test2.py new file mode 100644 index 0000000..cfdcdda --- /dev/null +++ b/V1.8/test2.py @@ -0,0 +1,12 @@ +import subprocess + +# 1. Define a smaller target width (try values between 30 and 60) +small_width = 60 + +# 2. Run chafa with the smaller size constraint +subprocess.run([ + "chafa", + "/Users/lakshyaprajapati/Documents/Repositories/generated_image.png", + "--symbols", "block", + f"--size={small_width}" # Shrinks the overall size +])