diff --git a/.env.example b/.env.example index 1e10ade..defee3c 100644 --- a/.env.example +++ b/.env.example @@ -20,24 +20,45 @@ BASE_URL=http://localhost:8000 # Admin (set to true to allow impersonation in development) ALLOW_HTTP_SESSIONS=true +# Slack — app configuration tokens (used by scripts/provision_slack_bots.py) +# One-time setup: https://api.slack.com/apps → "Your App Configuration Tokens" → Generate Token +# After generation paste both values here; the provisioning script rotates them automatically. +SLACK_CONFIG_TOKEN=xoxe-your-app-config-token +SLACK_CONFIG_REFRESH_TOKEN=xoxe-your-app-config-refresh-token + # Slack — one pair per agent (Bot User OAuth Token + App-Level Token) +# Add as many agents as needed using this pattern; no code changes required. +# Run scripts/provision_slack_bots.py to create and install all missing bots automatically. +# SLACK_BOT_TOKEN_=xoxb-... (required) +# SLACK_APP_TOKEN_=xapp-... (optional) SLACK_BOT_TOKEN_SU=xoxb-placeholder -SLACK_APP_TOKEN_SU=xapp-placeholder SLACK_BOT_TOKEN_WISEMAN=xoxb-placeholder -SLACK_APP_TOKEN_WISEMAN=xapp-placeholder -SLACK_BOT_TOKEN_LOTZ=xoxb-placeholder -SLACK_APP_TOKEN_LOTZ=xapp-placeholder -SLACK_BOT_TOKEN_CRAVATT=xoxb-placeholder -SLACK_APP_TOKEN_CRAVATT=xapp-placeholder -SLACK_BOT_TOKEN_GROTJAHN=xoxb-placeholder -SLACK_APP_TOKEN_GROTJAHN=xapp-placeholder -SLACK_BOT_TOKEN_PETRASCHECK=xoxb-placeholder -SLACK_APP_TOKEN_PETRASCHECK=xapp-placeholder -SLACK_BOT_TOKEN_KEN=xoxb-placeholder -SLACK_APP_TOKEN_KEN=xapp-placeholder -SLACK_BOT_TOKEN_RACKI=xoxb-placeholder -SLACK_APP_TOKEN_RACKI=xapp-placeholder -SLACK_BOT_TOKEN_SAEZ=xoxb-placeholder -SLACK_APP_TOKEN_SAEZ=xapp-placeholder -SLACK_BOT_TOKEN_WU=xoxb-placeholder -SLACK_APP_TOKEN_WU=xapp-placeholder +SLACK_BOT_TOKEN_GRANTBOT=xoxb-placeholder + +# Podcast TTS backend: "mistral" (default), "openai", or "local" (vLLM-Omni server) +PODCAST_TTS_BACKEND="mistral" + +# Mistral AI TTS (used when PODCAST_TTS_BACKEND=mistral) +MISTRAL_API_KEY=your-mistral-api-key +MISTRAL_TTS_MODEL=voxtral-mini-tts-latest +MISTRAL_TTS_DEFAULT_VOICE=your-voice-uuid + +# OpenAI TTS (used when PODCAST_TTS_BACKEND=openai) +# Voices: alloy echo fable onyx nova shimmer +# Models: tts-1 tts-1-hd gpt-4o-mini-tts +OPENAI_API_KEY=your-openai-api-key +OPENAI_TTS_MODEL=tts-1 +OPENAI_TTS_DEFAULT_VOICE=alloy + +# Local vLLM-Omni TTS server (used when PODCAST_TTS_BACKEND=local) +# Start with: vllm serve --port 8010 +LOCAL_TTS_HOST=127.0.0.1 +LOCAL_TTS_PORT=8008 +LOCAL_TTS_MODEL=mistralai/Voxtral-4B-TTS-2603 +LOCAL_TTS_VOICE=default + +# Podcast +PODCAST_BASE_URL=http://localhost:8001 +PODCAST_SEARCH_WINDOW_DAYS=14 +PODCAST_MAX_CANDIDATES=50 +# PODCAST_NORMALIZE_AUDIO=true # uncomment to enable ffmpeg loudnorm post-processing (EBU R128, -16 LUFS) diff --git a/.gitignore b/.gitignore index aad82ec..342842f 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,9 @@ certbot/ .pytest_cache/ .coverage htmlcov/ + +# Runtime data (state files, generated audio — ephemeral) +data/ + +# Test output artifacts +.labbot-tests/ diff --git a/AGENT.md b/AGENT.md index a94b338..39628fc 100644 --- a/AGENT.md +++ b/AGENT.md @@ -32,6 +32,7 @@ All specs are in `/specs/`: - `profile-ingestion.md` — 9-step pipeline, ORCID → PubMed → PMC → LLM - `admin-dashboard.md` — read-only, server-rendered, impersonation - `agent-system.md` — Slack Bolt, Socket Mode, two-phase LLM calls, simulation engine +- `labbot-podcast.md` — daily personalized research briefing: PubMed search, LLM selection/summarization, Local or API TTS, Slack DM delivery, per-PI RSS podcast feed ## Tech Stack diff --git a/CLAUDE.md b/CLAUDE.md index 66a844b..4c8db5a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,5 +1,29 @@ # CLAUDE.md +## Project Overview + +**coPI** is an AI-powered research collaboration discovery platform for academic PIs. It combines: + +- **Web app** (`src/routers/`, `templates/`) — FastAPI + Jinja2, ORCID OAuth login, profile editing, admin dashboard +- **Profile pipeline** (`src/services/`) — Ingests ORCID/PubMed data; Claude Opus synthesizes a public + private profile per researcher +- **Agent simulation** (`src/agent/`) — 12 AI Slack bots (one per pilot lab) that converse, identify synergies, and generate collaboration proposals in a turn-based 5-phase loop +- **Podcast pipeline** (`src/podcast/`) — Daily personalized research briefings via Slack DM + RSS feed with TTS audio +- **GrantBot** (`src/agent/grantbot.py`) — Fetches NIH/NSF FOAs, posts relevant ones to Slack channels +- **Background worker** (`src/worker/`) — PostgreSQL-backed job queue for profile generation and monthly refreshes + +**Stack:** Python/FastAPI, PostgreSQL + SQLAlchemy async, Anthropic Claude (Opus for profiles, Sonnet for agents), Slack Web API, Docker Compose, AWS (S3/SES). + +**Key patterns:** +- Public profiles exported to `profiles/public/` (disk markdown, agent-readable) +- Private profiles in `profiles/private/` (PI behavioral instructions, editable via web/DM) +- Agent working memory in `profiles/memory/` (updated post-simulation) +- All LLM calls logged to `LlmCallLog` table (model, tokens, latency, cost) +- Agent messages append-only in `MessageLog`; outcomes in `ThreadDecision`; PI ratings in `ProposalReview` +- Prompts are standalone files in `prompts/` — editable without code changes +- Specs for all subsystems in `specs/` + +**Pilot agents:** SuBot, WisemanBot, LotzBot, CravattBot, GrotjahnBot, PetrascheckBot, KenBot, RackiBot, SaezBot, WuBot, WardBot, BrineyBot + ## Testing Run `python -m pytest tests/ -v` before committing. All tests must pass. @@ -42,3 +66,50 @@ docker compose --profile agent run -d --name agent-run agent python -m src.agent ``` **Note:** The agent-run container uses mounted source code but the Python process only loads modules at startup. Code changes require a container restart to take effect. **After any code change that affects the running agent process, flag this to the user so they can decide whether to restart.** + +## Podcast Pipeline + +The LabBot Podcast pipeline (specs/labbot-podcast.md) runs daily at 9am UTC for each active agent: + +1. Build PubMed queries from lab's public profile +2. Fetch candidates from PubMed + bioRxiv + medRxiv + arXiv (last 14 days, up to 50+10 candidates) +3. Claude Sonnet selects most relevant paper (applying PI's podcast preferences from their private ProfileRevision) +4. Claude Opus writes a ~250-word structured brief +5. TTS audio generated (Mistral or local vLLM-Omni); ffmpeg loudnorm applied if PODCAST_NORMALIZE_AUDIO=true +6. Slack DM sent to PI with text summary + RSS link +7. RSS feed available at `/podcast/{agent_id}/feed.xml` +8. Audio served at `/podcast/{agent_id}/audio/{date}.mp3` + +Preprint IDs use prefixed format: `biorxiv:...`, `medrxiv:...`, `arxiv:...`. The `paper_url` in summaries links to the correct server (not always PubMed). + +```bash +# Run podcast pipeline once for all active agents +docker compose --profile podcast run --rm podcast python -m src.podcast.main + +# Test pipeline for 'su' agent only +docker compose exec app python scripts/test_podcast_su.py +``` + +## Database Migration Caveat + +If the DB was initialized from the `main` branch schema and then this branch is checked out, `alembic upgrade head` will stamp the version without re-running migrations that share a revision ID with ones already applied on `main`. Any columns added by branch-specific migrations may be silently missing. + +**Symptom:** `UndefinedColumnError` at runtime despite `alembic current` showing `head`. + +**Fix:** Check for missing columns and apply them manually: +```bash +docker compose exec app python -c " +import asyncio +from src.database import get_engine +from sqlalchemy import text + +async def check(): + eng = get_engine() + async with eng.connect() as conn: + result = await conn.execute(text(\"SELECT column_name FROM information_schema.columns WHERE table_name='researcher_profiles' ORDER BY ordinal_position\")) + print([r[0] for r in result]) + +asyncio.run(check()) +" +``` +Then add any missing columns with `ALTER TABLE ... ADD COLUMN IF NOT EXISTS ...`. diff --git a/Cohort_approaches.txt b/Cohort_approaches.txt new file mode 100644 index 0000000..02ca1c2 --- /dev/null +++ b/Cohort_approaches.txt @@ -0,0 +1,102 @@ +Cohort System — Approaches Considered +====================================== + +Approach A: Cohort as an Interaction Filter (Minimal) +------------------------------------------------------ +Add a cohort_memberships table (agent_id, cohort_id). The engine stays structurally +identical — one global turn loop, shared agent state. Before any interaction is +permitted (Phase 3 activation, Phase 4 reply, Phase 5 tag), check: do these two +agents share at least one cohort? If not, the post is invisible to them. + +Pros: +- Tiny diff, backward compatible +- Agents in multiple cohorts still have unified state + +Cons: +- Turn rules (thread limits, proposal caps, budgets) remain global per agent — + cannot be scoped by cohort +- Original form: no concurrency — cohorts still compete in a single sequential loop + +Revision (adopted): The concurrency gap is filled independently of cohorts using a +global semaphore (N concurrent turns) + min-heap agent selection, keeping the cohort +system as a pure interaction filter with no role in scheduling. See Chosen Direction. + + +Approach B: Per-Cohort Agent State (Partitioned) +------------------------------------------------- +AgentState becomes dict[cohort_id, AgentState]. The main loop iterates cohorts in +round-robin (or concurrently via asyncio.gather), each cohort running its own turn +selection over its member agents. Thread limits, proposal counts, and budgets are +tracked per (agent_id, cohort_id) — an agent in two cohorts has independent budgets +in each. Interaction gating is automatic: Phase 4/5 only operate within a cohort's +member set. + +Pros: +- True per-cohort parallelism +- Rules naturally scoped +- Clean mental model + +Cons: +- Meaningful refactor — AgentState, budget tracking, blocking logic all need the + cohort dimension +- Agent working memory (profiles/memory/) would need cohort tagging or remain + shared across cohorts + + +Approach C: Cohort-Sharded Engine Instances (Full Isolation) +------------------------------------------------------------- +Instantiate one SimulationEngine per cohort, each with only its member agents +loaded. Run them as separate asyncio tasks (or even separate processes). Agents in +multiple cohorts appear in multiple engines with independent state copies. + +Pros: +- Complete isolation +- Maximum parallelism +- No cross-contamination of state + +Cons: +- Agents in overlapping cohorts post from the same Slack bot token simultaneously — + requires serialization or per-cohort bot accounts +- State diverges: memory written by cohort-A engine does not feed cohort-B engine +- Most operationally complex of the three options + + +Chosen Direction +---------------- +Approach A (interaction filter) + global semaphore concurrency + min-heap selection. +Cohorts have no role in scheduling — they only gate whether an agent acts on another +agent's activity. Key decisions and findings: + +Requirements that shaped the design: +- Limits are shared across cohorts (no state partitioning needed — rules Approach B) +- Posts remain visible to all; cohort gates *acting*, not *seeing* +- Cohort memberships are dynamic (admin-driven, can change mid-run) +- Goal is purely practical: skip unnecessary LLM calls, not thematic isolation + +Why per-cohort async dispatch was rejected: +- Cohort count is unbounded — N async tasks scales with cohorts, not with agents +- Agents in many cohorts get selected proportionally more often (cohort-count bias) +- Replaced by a fixed global semaphore (concurrent_turns, default = active_thread_threshold) + whose width is independent of cohort topology + +Turn selection — min-heap over weighted random: +- Weighted random gives probabilistic fairness but can starve agents at large list sizes, + especially when phase5_skip_probability > 0 (fast no-op turns let agents re-enter + immediately) +- Min-heap keyed by last_selected guarantees the longest-waiting eligible agent always + gets the next slot; O(log n) selection vs O(n) +- concurrent_turns defaults to active_thread_threshold (both = 3) so the two levers + stay in proportion as the thread threshold is tuned + +turn_delay_seconds — repurposed from global pause to per-agent cooldown: +- Investigation finding: in simulation.py:360-361, turn_delay_seconds is an asyncio.sleep + applied AFTER every productive turn, blocking the entire loop (no Slack polling, no other + agents). A global dead-weight pause — correct semantics for rate-limiting a single + sequential loop, wrong for any concurrent model. +- New behavior: enforced as a per-agent eligibility check inside heap construction — + an agent is excluded from selection until (now - last_selected) >= turn_delay_seconds. + Other agents are unaffected. The global sleep is removed. +- The existing _last_llm_caller guard (prevents same agent back-to-back calls) is + superseded by the min-heap + cooldown and removed from the concurrent path. + +See specs/cohort-system.md for the full implementation plan. diff --git a/Copi-future-devel.md b/Copi-future-devel.md new file mode 100644 index 0000000..1501991 --- /dev/null +++ b/Copi-future-devel.md @@ -0,0 +1,277 @@ +# coPI Future Development — Idea Board + +An unstructured, living collection of future features, infrastructure directions, and scaling ideas. No timelines or priority order implied. + +--- + +## Distributed Agent Architecture + +**Agents and bots as first-class autonomous services.** Each lab agent (SuBot, WisemanBot, etc.) should eventually run as its own independent Docker container — or deployable microservice — rather than being orchestrated by a single monolithic simulation engine. Each agent container would own: + +- Its own LLM call lifecycle (rate limits, retries, model selection, budget tracking) +- Its own identity (Slack credentials, agent ID, display name) +- Its own profile layer (public, private, working memory) synced to and from the central DB +- Its own podcast pipeline and grant discovery preferences +- Its own health endpoint and observability + +These distributed agents integrate with the centralized **copi.science** platform via a lightweight API contract: +- Register with the platform on startup (agent ID, PI association, capability flags) +- Publish events (new message, proposal created, working memory updated) to a shared message bus or webhook endpoint +- Pull configuration and profiles from the central API rather than shared filesystem +- Associate with PI user accounts via the existing ORCID auth / user model + +**Benefits:** +- Independent deployment and restart without affecting other agents +- Per-agent resource tuning (some labs need more LLM budget, longer context, different models) +- Natural path to community-contributed or self-hosted agents (a lab outside the pilot could run their own container and join the coPI network) +- Fault isolation — one broken agent token doesn't stop the whole simulation + +**Near-term stepping stone:** Split the current `SimulationEngine` into a thin coordinator that dispatches turns to per-agent worker processes (could be Python subprocesses or async tasks before full container split). The database logging and Slack polling infrastructure already supports this. + +--- + +## Platform & Infrastructure + +### Message Bus / Event Stream +Replace polling-based turn coordination with a lightweight event bus (Redis Streams, NATS, or AWS EventBridge). Agents subscribe to relevant channels rather than the engine polling on their behalf. Enables real-time reaction without idle backoff hacks. + +### Database +- Migrate Postgres to AWS RDS (Multi-AZ, automated backups, point-in-time recovery) +- Consider read replicas for admin dashboard queries that don't need to block writes +- Add full-text search over agent messages and proposals (pg_trgm or Elasticsearch) to make the corpus of generated science discoverable + +### Job Queue +Swap the simple Postgres jobs table for AWS SQS or a proper task queue (Celery + Redis, or Temporal). Better visibility into job failures, retries, and dead-letter handling. + +### Object Storage +Move profile markdown files and podcast audio off the local filesystem into S3: +- Profiles served directly from S3 URLs (CDN-friendly, no filesystem sync needed across containers) +- Podcast audio served from S3/CloudFront instead of the app server +- Enables stateless app containers (no mounted volumes) + +### Container Orchestration +Migrate from Docker Compose on a single EC2 to ECS Fargate (or EKS): +- Each agent as its own Fargate task definition +- App, worker, grantbot, podcast each independently scalable +- Task-level IAM roles instead of shared env vars + +### Secrets Management +Replace `.env` files with AWS Secrets Manager or Vault. Each agent container gets its own secret scope (its Slack token, its LLM API key quota). + +--- + +## Platform Independence: Beyond Slack + +The current simulation engine is tightly coupled to Slack's Web API — `conversations_history`, `chat.postMessage` with `thread_ts`, `conversations_create`, and DMs for PI notifications. This creates a hard dependency on a proprietary SaaS platform with rate limits, pricing tiers, workspace policies, and no self-hosting path. Long-term, the conversation substrate should be replaceable. + +### The Core Requirement + +Whatever platform replaces or supplements Slack must support: +- **Channel history polling** — fetch messages since a timestamp, paginated +- **Threaded replies** — post a reply scoped to a specific parent message +- **Channel creation via API** — agents create collaboration channels dynamically +- **Bot identities** — multiple bot accounts, each with their own token/identity +- **Rate limits configurable or disableable** on a self-hosted instance + +### Recommended Platforms (researched) + +**Mattermost** (Tier 1 — drop-in replacement). API is near-identical to Slack's. `conversations_history` → `posts.get_posts_for_channel(since=ts)`, `chat.postMessage` with `thread_ts` → `posts.create_post(root_id=...)`. The `python-mattermost-driver` library is comprehensive and actively maintained. Docker Compose deployment is the simplest of any option. Rate limits can be disabled entirely on a self-hosted instance via `config.json`. One gotcha: `root_id` must point to the thread root, not a child post (same invariant as Slack's `thread_ts`). AGPLv3 core license. Lowest migration cost from current codebase. + +**Zulip** (Tier 1 — cleanest architecture). The official `zulip` Python SDK is the best-maintained of any platform reviewed. Threading model is topic-based rather than message-based: each "thread" is a named topic inside a stream (e.g., topic `collab-su-cravatt-proteomics` in stream `general`). This is a conceptual remap but actually cleaner for coPI — topic names are human-readable, searchable, and map naturally to collaboration channel names. `RATE_LIMITING = False` in `settings.py` is a one-line bypass. Five-service Docker stack (app + PostgreSQL + Memcached + RabbitMQ + Redis) is more complex than Mattermost but well-documented. AGPLv3. + +**Matrix / Synapse** (Tier 2 — best for multi-institution federation). If the vision extends to agents at different institutions (Scripps, UCSF, Stanford) each running their own homeserver, Matrix is the only protocol designed for this. The **Application Services API** lets a single process manage a namespace of virtual bot users (`@subot:scripps.copi.science`, `@wisemanbot:scripps.copi.science`) without N separate auth sessions — the right architecture for hundreds of agents. Native `m.thread` relation type (stable since 2022) supports threaded replies. `matrix-nio` Python SDK is solid. More operational complexity than Mattermost/Zulip. Apache 2.0 license. + +**Not recommended:** Rocket.Chat (MongoDB stack, messier licensing, weaker Python tooling), Stoat/Revolt (Discord-style reply references don't model thread groups, no official Python SDK), Discourse (forum anti-spam defenses actively fight high-frequency bot posting, non-standard Docker launcher), Flarum (PHP, no official Python client, no official Docker image), Lemmy/ActivityPub directly (microblogging semantics, inadequate rate limits for agent throughput). + +### Abstraction Layer + +The right architectural response is a thin `ConversationBackend` interface in the simulation engine: +``` +post_message(channel, text, thread_id=None) +get_channel_history(channel, since=None) -> [Message] +create_channel(name) -> channel_id +open_dm(user_ids) -> channel_id +``` +Concrete implementations: `SlackBackend`, `MattermostBackend`, `ZulipBackend`, `MatrixBackend`. The `SimulationEngine` depends only on the interface. This makes platform swaps testable and enables running the same simulation against multiple backends in parallel (e.g., Slack for the live pilot, Mattermost for CI/dev). + +### Near-Term Step + +Stand up a Mattermost instance alongside the existing Slack workspace. Port `AgentSlackClient` to `AgentMattermostClient` using the translation table below. Run a shadow simulation against Mattermost to validate parity before cutting over. + +| Slack operation | Mattermost API v4 | +|---|---| +| `conversations_history(channel, oldest)` | `GET /channels/{id}/posts?since={ms}` | +| `conversations_replies(channel, ts)` | `GET /posts/{id}/thread` | +| `chat.postMessage(channel, text)` | `POST /posts {channel_id, message}` | +| `chat.postMessage(channel, text, thread_ts)` | `POST /posts {root_id, message}` | +| `conversations_create(name)` | `POST /channels {team_id, name, type="O"}` | +| `conversations_list()` | `GET /teams/{id}/channels` | +| `conversations_open(users)` | `POST /channels/direct [uid1, uid2]` | + +--- + +## Agent Intelligence & Behavior + +### Extended Tool Use +Give agents access to more tools during their turns: +- `search_literature(query)` — semantic search over bioRxiv/PubMed (beyond the static abstract retrieval) +- `retrieve_code_repo(github_url)` — read a lab's public software to understand methods concretely +- `query_knowledge_graph()` — tap Andrew Su's BioThings/Translator infrastructure for drug-gene-disease links +- `calculate_overlap(agent_a, agent_b)` — structured synergy score from the matchmaker engine + +### Persistent Cross-Run Memory +Current working memory is a single markdown file rewritten after each run. Future direction: a structured memory graph (embedding store + entity records) that: +- Tracks the state of every relationship between labs (explored, dormant, active, concluded) +- Retains key facts learned about other labs across months of runs +- Enables long-term arc tracking ("WisemanBot and CravattBot have been circling a covalent proteostasis project for 3 months") + +### Agent-to-Agent DMs +Currently prohibited to keep conversations in observable channels. A future opt-in "private negotiation" mode could allow bilateral DMs for sensitive pre-proposal discussions, with PI notification. + +### Multi-Institutional Agents +Extend the pilot beyond Scripps. An agent from an external institution could join the Slack workspace (or a federated equivalent) and participate using the same protocol. The distributed container architecture above makes this natural. + +### Human / PI-Driven Proposal Inception + +Currently proposals always originate from agent-to-agent conversation. PIs should be able to seed the process directly: + +**Chat-initiated proposals.** A PI DMs their bot with a rough idea ("I want to explore a collab with someone working on cryo-EM and proteostasis — see who makes sense") and the agent treats this as a standing directive: it builds a research brief around the idea, searches other agents' public profiles for the best matches, and opens a targeted conversation rather than waiting for organic emergence. + +**PI Wish List.** A structured, PI-maintained list of collaboration interests stored alongside the private profile — not freeform text, but a lightweight list of entries: research question, preferred skills/methods, urgency, open/closed status. The agent checks this list during its Phase 5 (new post) turn and can proactively draft an opening post or reach out to the top-matching agent. The wish list is editable via the web UI and via DM commands ("add to my wish list: looking for a structural biology collaborator for GPCR ligand validation"). + +**Agent acts as scout, not author.** The PI's idea shapes the direction; the agent's job is to find the best possible match from the available labs, surface the evidence for that match (overlapping publications, complementary methods), and bring the most promising candidate into a real conversation. The PI retains approval over which threads get opened. + +**Closed-loop feedback.** When a wish-list item matures into a formal proposal or is explicitly dismissed by the PI, the item is marked resolved. Stale open items can trigger a periodic "still interested?" DM to the PI. + +### Proposal Auto-Drafting +After a collaboration thread matures, an agent could invoke a structured drafting pipeline that produces a formatted two-page specific aims document, exported to PDF and emailed to both PIs for review. + +### Richer Confidence Signals +Agents currently self-label proposals as High / Moderate / Speculative. Future: a second-pass evaluator (a separate LLM call or fine-tuned classifier) that independently scores proposal quality and flags ones that violated the collaboration quality standards. + +--- + +## Profile Pipeline + +### Continuous Refresh +Instead of monthly batch refresh, watch for new publications in near-real-time (PubMed RSS, bioRxiv API polling) and trigger incremental profile updates within hours of a paper appearing. + +### Richer Ingestion Sources +- Preprint servers (bioRxiv, medRxiv) as first-class citation sources, not just podcast candidates +- Lab websites (scrape protocols.io, lab pages, GitHub) for methods and reagent lists +- Grant databases (NIH Reporter) to surface active funding and project aims +- Faculty CV / biosketch (PDF upload and parse) + +### Semantic Embedding Index +Index all public profiles as embeddings for fast nearest-neighbor matchmaking across large numbers of labs (beyond the 12-pilot pairwise comparison). + +--- + +## Web Platform + +### Open Registration +Allow any PI to self-register via ORCID, trigger their own profile generation, and optionally spin up an agent. Move from invite-only pilot to open beta. + +### Agent Marketplace / Directory +A public directory of all registered lab agents, their research domains, and active collaboration interests. Searchable and filterable. Acts as a network graph visualization. + +### PI Dashboard Evolution +- Timeline view of all agent activity (messages, proposals, funding threads) +- Side-by-side comparison of two agents' profiles with synergy scoring +- Export proposal history to PDF or grant writing tool format + +### Notifications & Integrations +- Email digest of week's collaboration activity +- Slack DM to PI when a proposal reaches draft stage (currently implemented) — extend to webhook / email fallback +- Calendar integration to suggest meeting times when both PIs are interested in a proposal + +### Roles & Teams +- Department or institute-level admin roles (a department chair can see all labs in their unit) +- Team accounts (lab manager, postdoc delegate) with fine-grained permissions beyond the current binary PI/admin model + +--- + +## Podcast & Content Pipeline + +### Speaker Attribution TTS +Use voice cloning or voice assignment so each lab's podcast episode has a consistent "voice identity" for the host. Differentiates the experience across labs. + +### Multi-paper Briefs +Current pipeline picks one paper per day. A "weekly digest" mode that covers 3–5 papers with comparative framing ("three papers this week all point toward..."). + +### Community Podcast Feed +An aggregated RSS feed across all labs, curated by the platform, surfacing cross-lab thematic clusters ("this week in proteostasis, three labs published on..."). + +### PI-Narrated Episodes +Allow PIs to record a short audio reaction to an episode (via mobile app or Slack voice message) that gets appended to the RSS episode, making the podcast interactive. + +--- + +## Observability & Ops + +### Cost Attribution +Tag every LLM call with agent ID, pipeline stage, and user account. Build a cost dashboard so PIs (and the platform operator) can see per-agent LLM spend over time. + +### Simulation Replay +Record enough state to replay a simulation run deterministically (message log + agent states at each turn). Enables debugging, demo mode, and A/B testing prompt changes against historical runs. + +### A/B Prompt Testing +Formalize a mechanism for running two versions of a prompt file simultaneously across different agents or simulation runs. Track quality metrics (proposal rate, PI approval rate, collaboration confidence distribution) to guide prompt iteration. + +### Alerting +- CloudWatch alarm on worker job failure rate +- Slack ping to admin channel when an agent's error rate exceeds threshold +- Budget alert when LLM spend crosses a weekly ceiling per agent + +--- + +## Provider and Platform Flexibility + +### LLM Provider Abstraction + +Agents are currently hard-wired to the Anthropic API (Opus for replies, Sonnet for scan/filter). A provider abstraction layer would let individual agents — or individual pipeline stages — use different models or vendors: + +- **Per-agent model selection.** A computationally heavier agent (e.g., one with a larger publication corpus) might use a faster/cheaper model for Phase 2 scanning while still using a high-quality model for Phase 4 replies. Another agent at a partner institution might have access to a different provider entirely. +- **Supported providers to abstract over:** Anthropic (current), OpenAI (GPT-4o, o3), Google (Gemini 2.x), Mistral, local/self-hosted models via vLLM or Ollama (important for institutions with data-sovereignty requirements or GPU clusters). +- **Implementation pattern:** A `LLMClient` interface with `complete(messages, tools=None, model=None)` — same interface used today in `src/services/llm.py` — backed by provider-specific implementations. The `LlmCallLog` table already captures model name and cost, so cost attribution across providers is already scaffolded. +- **Budget routing.** Route expensive calls (long context, tool-use loops) to cheaper providers when quality thresholds allow. Route trust-sensitive calls (private profile rewrites, PI DMs) to a designated "primary" provider the institution controls. + +### Social Media & Public Communication Channels + +Beyond closed-network agent-to-agent communication, agents could have a presence on public academic social platforms — either as a read channel (monitoring relevant conversations) or a publish channel (sharing lab updates, collaboration interests). + +**Bluesky (AT Protocol).** Open protocol, self-hostable Personal Data Servers (PDS), Python SDK (`atproto` on PyPI). API supports posting, reading timelines, and following/mention notifications. The AT Protocol's federated architecture aligns well with the distributed agent model — each institution could host its own PDS for their agents. Rate limits are API-key-bound and configurable on a self-hosted PDS. Agents could post brief research updates, tag other labs, and surface collaboration interests publicly. + +**Mastodon / ActivityPub.** `Mastodon.py` is a well-maintained Python SDK. Each agent gets a Fediverse identity. Posting via `status_post()`, reading via `timeline_hashtag()` or `notifications()`. Hard rate limit: 300 requests per 5 minutes per access token — sufficient for low-frequency public updates but not for the turn-based simulation engine's polling cadence. Best suited as a broadcast channel (agent posts a summary of a new collaboration proposal, links back to copi.science) rather than a simulation substrate. + +**Twitter/X.** REST API v2, Python via `tweepy`. Rate limits on the free tier are extremely restrictive (500 posts/month per app); the Basic tier ($100/month) allows more. Viable only as a one-way broadcast (GrantBot posts relevant funding opportunities publicly) rather than agent dialogue. + +**Use cases worth building:** +- **Public lab feed.** Each agent maintains a Bluesky or Mastodon account. When a proposal reaches "High" confidence and the PI approves, the agent posts a one-paragraph summary publicly. Acts as a live research networking signal visible to the broader community. +- **Cross-network discovery.** Agent monitors a set of hashtags or accounts on Bluesky/Mastodon, surfaces interesting posts to the PI via the daily podcast brief or a Slack DM, and can propose a collaboration with an external lab it discovered online. +- **Grant opportunity broadcast.** GrantBot posts relevant FOAs to Bluesky/Mastodon in addition to the internal Slack channel, reaching researchers outside the immediate pilot network. + +### Communication Platform Routing + +As agents acquire multiple possible communication surfaces (internal Slack/Mattermost, public Bluesky, email, web DM), a routing layer determines which surface is appropriate for a given message type: + +| Message type | Internal channel | PI notification | Public broadcast | +|---|---|---|---| +| Agent-to-agent collaboration | Mattermost/Matrix | — | — | +| High-confidence proposal ready | — | Slack/email DM | — (until PI approves) | +| PI-approved proposal summary | — | — | Bluesky/Mastodon | +| Funding opportunity | Internal #funding | Slack DM | Bluesky/Twitter | +| Daily podcast brief | — | Slack DM + RSS | — | + +--- + +## Community & Open Source + +### Agent SDK / Protocol +Publish a minimal open spec for the "coPI agent protocol" — the API contract that any lab bot must implement to join a coPI network. This allows third-party developers to build custom agents (domain-specific, tool-augmented) that integrate with the platform. + +### Self-Hosted Agent Nodes +A PI or institution could run the agent container on their own infrastructure, connecting to the shared coPI.science platform. Their data stays on their servers; only messages and public profile content cross the wire. + +### Plugin System for Tools +Make the agent tool registry extensible so domain-specific tools (cryo-EM database lookup, ChEMBL query, protein structure retrieval) can be added per-agent without touching the core simulation engine. diff --git a/Dockerfile b/Dockerfile index c032e95..63a7b94 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,7 @@ WORKDIR /app RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ libpq-dev \ + ffmpeg \ && rm -rf /var/lib/apt/lists/* # Install Python dependencies diff --git a/alembic/versions/0010_access_gate_and_waitlist.py b/alembic/versions/0010_access_gate_and_waitlist.py index 36c0ec6..79cb165 100644 --- a/alembic/versions/0010_access_gate_and_waitlist.py +++ b/alembic/versions/0010_access_gate_and_waitlist.py @@ -1,7 +1,7 @@ """Access gate + waitlist -Revision ID: 0010 -Revises: 0009 +Revision ID: 0010a +Revises: 0010 Create Date: 2026-04-15 00:00:00.000000 """ @@ -13,8 +13,8 @@ from alembic import op -revision: str = "0010" -down_revision: Union[str, None] = "0009" +revision: str = "0010a" +down_revision: Union[str, None] = "0010" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None diff --git a/alembic/versions/0010_add_podcast_episodes.py b/alembic/versions/0010_add_podcast_episodes.py new file mode 100644 index 0000000..adad7d2 --- /dev/null +++ b/alembic/versions/0010_add_podcast_episodes.py @@ -0,0 +1,56 @@ +"""Add podcast_episodes table + +Revision ID: 0010 +Revises: 0009 +Create Date: 2026-04-09 00:00:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +revision: str = "0010" +down_revision: Union[str, None] = "0009" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "podcast_episodes", + sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column("agent_id", sa.String(50), nullable=False), + sa.Column("episode_date", sa.Date, nullable=False), + sa.Column("pmid", sa.String(100), nullable=False), + sa.Column("paper_title", sa.String(500), nullable=False), + sa.Column("paper_authors", sa.String(500), nullable=False), + sa.Column("paper_journal", sa.String(255), nullable=False), + sa.Column("paper_year", sa.Integer, nullable=False), + sa.Column("text_summary", sa.Text, nullable=False), + sa.Column("audio_file_path", sa.String(500), nullable=True), + sa.Column("audio_duration_seconds", sa.Integer, nullable=True), + sa.Column("slack_delivered", sa.Boolean, nullable=False, server_default="false"), + sa.Column("selection_justification", sa.Text, nullable=False), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.func.now(), + nullable=False, + ), + ) + op.create_index("ix_podcast_episodes_agent_id", "podcast_episodes", ["agent_id"]) + op.create_index("ix_podcast_episodes_episode_date", "podcast_episodes", ["episode_date"]) + op.create_unique_constraint( + "uq_podcast_agent_date", "podcast_episodes", ["agent_id", "episode_date"] + ) + + +def downgrade() -> None: + op.drop_constraint("uq_podcast_agent_date", "podcast_episodes") + op.drop_index("ix_podcast_episodes_episode_date") + op.drop_index("ix_podcast_episodes_agent_id") + op.drop_table("podcast_episodes") diff --git a/alembic/versions/0011_add_podcast_paper_url.py b/alembic/versions/0011_add_podcast_paper_url.py new file mode 100644 index 0000000..5b2aa8f --- /dev/null +++ b/alembic/versions/0011_add_podcast_paper_url.py @@ -0,0 +1,29 @@ +"""Add paper_url column to podcast_episodes + +Revision ID: 0011 +Revises: 0010 +Create Date: 2026-04-10 00:00:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +revision: str = "0011" +down_revision: Union[str, None] = "0010a" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "podcast_episodes", + sa.Column("paper_url", sa.String(1000), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("podcast_episodes", "paper_url") diff --git a/alembic/versions/0012_add_podcast_preferences.py b/alembic/versions/0012_add_podcast_preferences.py new file mode 100644 index 0000000..bba69c7 --- /dev/null +++ b/alembic/versions/0012_add_podcast_preferences.py @@ -0,0 +1,64 @@ +"""Add podcast_preferences table + +Revision ID: 0012 +Revises: 0011 +Create Date: 2026-04-14 00:00:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from sqlalchemy.dialects.postgresql import ARRAY + +from alembic import op + +revision: str = "0012" +down_revision: Union[str, None] = "0011" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "podcast_preferences", + sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column("agent_id", sa.String(50), nullable=False), + sa.Column("voice_id", sa.String(100), nullable=True), + sa.Column( + "extra_keywords", + ARRAY(sa.String), + nullable=False, + server_default="{}", + ), + sa.Column( + "preferred_journals", + ARRAY(sa.String), + nullable=False, + server_default="{}", + ), + sa.Column( + "deprioritized_journals", + ARRAY(sa.String), + nullable=False, + server_default="{}", + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.func.now(), + ), + ) + op.create_index( + "ix_podcast_preferences_agent_id", + "podcast_preferences", + ["agent_id"], + unique=True, + ) + + +def downgrade() -> None: + op.drop_index("ix_podcast_preferences_agent_id", table_name="podcast_preferences") + op.drop_table("podcast_preferences") diff --git a/alembic/versions/0013_podcast_user_support.py b/alembic/versions/0013_podcast_user_support.py new file mode 100644 index 0000000..89d77cd --- /dev/null +++ b/alembic/versions/0013_podcast_user_support.py @@ -0,0 +1,83 @@ +"""Extend podcast tables to support plain ORCID users (no agent required) + +Adds nullable user_id FK to podcast_preferences and podcast_episodes so that +any user who has completed onboarding can receive daily research briefings +without needing an approved AgentRegistry entry. + +Changes: + - podcast_preferences.agent_id: NOT NULL → nullable + - podcast_preferences.user_id: new nullable FK → users.id, unique index + - podcast_episodes.agent_id: NOT NULL → nullable + - podcast_episodes.user_id: new nullable FK → users.id + - podcast_episodes: partial unique index on (user_id, episode_date) WHERE user_id IS NOT NULL + +Revision ID: 0013 +Revises: 0012 +Create Date: 2026-04-14 00:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID + +from alembic import op + +revision: str = "0013" +down_revision: Union[str, None] = "0012" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # --- podcast_preferences --- + # Make agent_id nullable (existing agent rows keep their values) + op.alter_column("podcast_preferences", "agent_id", nullable=True) + + # Add user_id FK column + op.add_column( + "podcast_preferences", + sa.Column( + "user_id", + UUID(as_uuid=True), + sa.ForeignKey("users.id", ondelete="CASCADE"), + nullable=True, + ), + ) + op.create_index( + "ix_podcast_preferences_user_id", + "podcast_preferences", + ["user_id"], + unique=True, + ) + + # --- podcast_episodes --- + # Make agent_id nullable (existing agent rows keep their values) + op.alter_column("podcast_episodes", "agent_id", nullable=True) + + # Add user_id FK column + op.add_column( + "podcast_episodes", + sa.Column( + "user_id", + UUID(as_uuid=True), + sa.ForeignKey("users.id", ondelete="CASCADE"), + nullable=True, + ), + ) + # Partial unique index: one episode per user per day (only when user_id is set) + op.execute( + "CREATE UNIQUE INDEX ix_podcast_episodes_user_date " + "ON podcast_episodes (user_id, episode_date) " + "WHERE user_id IS NOT NULL" + ) + + +def downgrade() -> None: + op.execute("DROP INDEX IF EXISTS ix_podcast_episodes_user_date") + op.drop_column("podcast_episodes", "user_id") + op.alter_column("podcast_episodes", "agent_id", nullable=False) + + op.drop_index("ix_podcast_preferences_user_id", table_name="podcast_preferences") + op.drop_column("podcast_preferences", "user_id") + op.alter_column("podcast_preferences", "agent_id", nullable=False) diff --git a/alembic/versions/0014_add_matchmaker_proposals.py b/alembic/versions/0014_add_matchmaker_proposals.py new file mode 100644 index 0000000..ea31eb4 --- /dev/null +++ b/alembic/versions/0014_add_matchmaker_proposals.py @@ -0,0 +1,57 @@ +"""Add matchmaker_proposals table + +Revision ID: 0014 +Revises: 0013 +Create Date: 2026-04-21 00:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID + +from alembic import op + +revision: str = "0014" +down_revision: Union[str, None] = "0013" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "matchmaker_proposals", + sa.Column("id", UUID(as_uuid=True), primary_key=True), + sa.Column( + "pi_a_id", + UUID(as_uuid=True), + sa.ForeignKey("users.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "pi_b_id", + UUID(as_uuid=True), + sa.ForeignKey("users.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("proposal_md", sa.Text, nullable=False), + sa.Column("title", sa.String(500), nullable=False), + sa.Column("confidence", sa.String(20), nullable=False), + sa.Column("llm_model", sa.String(100), nullable=False), + sa.Column("input_tokens", sa.Integer, nullable=True), + sa.Column("output_tokens", sa.Integer, nullable=True), + sa.Column( + "generated_at", + sa.DateTime(timezone=True), + server_default=sa.func.now(), + nullable=False, + ), + ) + op.create_index("ix_matchmaker_proposals_pi_a_id", "matchmaker_proposals", ["pi_a_id"]) + op.create_index("ix_matchmaker_proposals_pi_b_id", "matchmaker_proposals", ["pi_b_id"]) + + +def downgrade() -> None: + op.drop_index("ix_matchmaker_proposals_pi_b_id", table_name="matchmaker_proposals") + op.drop_index("ix_matchmaker_proposals_pi_a_id", table_name="matchmaker_proposals") + op.drop_table("matchmaker_proposals") diff --git a/alembic/versions/0015_matchmaker_nullable_ids_and_names.py b/alembic/versions/0015_matchmaker_nullable_ids_and_names.py new file mode 100644 index 0000000..cf7b95f --- /dev/null +++ b/alembic/versions/0015_matchmaker_nullable_ids_and_names.py @@ -0,0 +1,30 @@ +"""Make matchmaker PI FKs nullable; add pi_a_name / pi_b_name for CLI path + +Revision ID: 0015 +Revises: 0014 +Create Date: 2026-04-22 00:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0015" +down_revision: Union[str, None] = "0014" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.alter_column("matchmaker_proposals", "pi_a_id", nullable=True) + op.alter_column("matchmaker_proposals", "pi_b_id", nullable=True) + op.add_column("matchmaker_proposals", sa.Column("pi_a_name", sa.String(255), nullable=True)) + op.add_column("matchmaker_proposals", sa.Column("pi_b_name", sa.String(255), nullable=True)) + + +def downgrade() -> None: + op.drop_column("matchmaker_proposals", "pi_b_name") + op.drop_column("matchmaker_proposals", "pi_a_name") + op.alter_column("matchmaker_proposals", "pi_b_id", nullable=False) + op.alter_column("matchmaker_proposals", "pi_a_id", nullable=False) diff --git a/alembic/versions/0016_add_pi_proposal_evaluations.py b/alembic/versions/0016_add_pi_proposal_evaluations.py new file mode 100644 index 0000000..930555c --- /dev/null +++ b/alembic/versions/0016_add_pi_proposal_evaluations.py @@ -0,0 +1,102 @@ +"""Add pi_proposal_evaluations table for NIH-style PI proposal scoring + +Revision ID: 0016 +Revises: 0015 +Create Date: 2026-05-04 00:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +revision: str = "0016" +down_revision: Union[str, None] = "0015" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "pi_proposal_evaluations", + sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column( + "user_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("users.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("proposal_type", sa.String(20), nullable=False), + sa.Column( + "thread_decision_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("thread_decisions.id", ondelete="CASCADE"), + nullable=True, + ), + sa.Column( + "matchmaker_proposal_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("matchmaker_proposals.id", ondelete="CASCADE"), + nullable=True, + ), + sa.Column("score_significance", sa.SmallInteger(), nullable=False), + sa.Column("score_innovation", sa.SmallInteger(), nullable=False), + sa.Column("score_approach", sa.SmallInteger(), nullable=False), + sa.Column("score_investigators", sa.SmallInteger(), nullable=False), + sa.Column("score_environment", sa.SmallInteger(), nullable=False), + sa.Column("score_overall_impact", sa.SmallInteger(), nullable=False), + sa.Column("comments_significance", sa.Text(), nullable=True), + sa.Column("comments_innovation", sa.Text(), nullable=True), + sa.Column("comments_approach", sa.Text(), nullable=True), + sa.Column("comments_investigators", sa.Text(), nullable=True), + sa.Column("comments_environment", sa.Text(), nullable=True), + sa.Column("comments_overall", sa.Text(), nullable=False), + sa.Column( + "evaluated_at", + sa.DateTime(timezone=True), + server_default=sa.func.now(), + nullable=False, + ), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), + ) + + # Indexes + op.create_index("ix_ppe_user_id", "pi_proposal_evaluations", ["user_id"]) + op.create_index("ix_ppe_user_type", "pi_proposal_evaluations", ["user_id", "proposal_type"]) + op.create_index("ix_ppe_thread_decision_id", "pi_proposal_evaluations", ["thread_decision_id"]) + op.create_index( + "ix_ppe_matchmaker_proposal_id", "pi_proposal_evaluations", ["matchmaker_proposal_id"] + ) + + # Unique constraints + op.create_unique_constraint( + "uq_ppe_user_thread", "pi_proposal_evaluations", ["user_id", "thread_decision_id"] + ) + op.create_unique_constraint( + "uq_ppe_user_matchmaker", + "pi_proposal_evaluations", + ["user_id", "matchmaker_proposal_id"], + ) + + # Check constraints + op.create_check_constraint( + "ck_ppe_proposal_type", + "pi_proposal_evaluations", + "proposal_type IN ('agent', 'matchmaker')", + ) + op.create_check_constraint( + "ck_ppe_proposal_present", + "pi_proposal_evaluations", + "thread_decision_id IS NOT NULL OR matchmaker_proposal_id IS NOT NULL", + ) + for col in ["significance", "innovation", "approach", "investigators", "environment", "overall_impact"]: + op.create_check_constraint( + f"ck_ppe_score_{col}", + "pi_proposal_evaluations", + f"score_{col} BETWEEN 1 AND 9", + ) + + +def downgrade() -> None: + op.drop_table("pi_proposal_evaluations") diff --git a/alembic/versions/0017_add_is_paused_to_agents.py b/alembic/versions/0017_add_is_paused_to_agents.py new file mode 100644 index 0000000..59e1759 --- /dev/null +++ b/alembic/versions/0017_add_is_paused_to_agents.py @@ -0,0 +1,32 @@ +"""Add is_paused column to agents table + +Revision ID: 0017 +Revises: 0016 +Create Date: 2026-05-05 00:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0017" +down_revision: Union[str, None] = "0016" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "agents", + sa.Column( + "is_paused", + sa.Boolean(), + nullable=False, + server_default=sa.text("false"), + ), + ) + + +def downgrade() -> None: + op.drop_column("agents", "is_paused") diff --git a/alembic/versions/0018_nullable_criterion_scores.py b/alembic/versions/0018_nullable_criterion_scores.py new file mode 100644 index 0000000..b9891fc --- /dev/null +++ b/alembic/versions/0018_nullable_criterion_scores.py @@ -0,0 +1,34 @@ +"""Make categorical criterion scores nullable in pi_proposal_evaluations + +Revision ID: 0018 +Revises: 0017 +Create Date: 2026-05-06 00:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0018" +down_revision: Union[str, None] = "0017" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +_COLS = [ + "score_significance", + "score_innovation", + "score_approach", + "score_investigators", + "score_environment", +] + + +def upgrade() -> None: + for col in _COLS: + op.alter_column("pi_proposal_evaluations", col, nullable=True) + + +def downgrade() -> None: + for col in _COLS: + op.alter_column("pi_proposal_evaluations", col, nullable=False) diff --git a/alembic/versions/0019_add_hidden_to_proposals.py b/alembic/versions/0019_add_hidden_to_proposals.py new file mode 100644 index 0000000..ed1da64 --- /dev/null +++ b/alembic/versions/0019_add_hidden_to_proposals.py @@ -0,0 +1,32 @@ +"""Add hidden column to thread_decisions and matchmaker_proposals + +Revision ID: 0019 +Revises: 0018 +Create Date: 2026-05-13 00:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0019" +down_revision: Union[str, None] = "0018" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "thread_decisions", + sa.Column("hidden", sa.Boolean(), nullable=False, server_default=sa.text("false")), + ) + op.add_column( + "matchmaker_proposals", + sa.Column("hidden", sa.Boolean(), nullable=False, server_default=sa.text("false")), + ) + + +def downgrade() -> None: + op.drop_column("thread_decisions", "hidden") + op.drop_column("matchmaker_proposals", "hidden") diff --git a/alembic/versions/0020_nullable_overall_comment.py b/alembic/versions/0020_nullable_overall_comment.py new file mode 100644 index 0000000..7525d7c --- /dev/null +++ b/alembic/versions/0020_nullable_overall_comment.py @@ -0,0 +1,24 @@ +"""Make comments_overall nullable in pi_proposal_evaluations + +Revision ID: 0020 +Revises: 0019 +Create Date: 2026-05-13 00:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0020" +down_revision: Union[str, None] = "0019" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.alter_column("pi_proposal_evaluations", "comments_overall", nullable=True) + + +def downgrade() -> None: + op.alter_column("pi_proposal_evaluations", "comments_overall", nullable=False) diff --git a/alembic/versions/0021_nullable_llm_call_log_simulation_run.py b/alembic/versions/0021_nullable_llm_call_log_simulation_run.py new file mode 100644 index 0000000..c075e31 --- /dev/null +++ b/alembic/versions/0021_nullable_llm_call_log_simulation_run.py @@ -0,0 +1,24 @@ +"""Make simulation_run_id nullable in llm_call_logs to support podcast pipeline logging + +Revision ID: 0021 +Revises: 0020 +Create Date: 2026-05-21 00:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0021" +down_revision: Union[str, None] = "0020" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.alter_column("llm_call_logs", "simulation_run_id", nullable=True) + + +def downgrade() -> None: + op.alter_column("llm_call_logs", "simulation_run_id", nullable=False) diff --git a/alembic/versions/0022_add_podcast_enabled.py b/alembic/versions/0022_add_podcast_enabled.py new file mode 100644 index 0000000..fa16737 --- /dev/null +++ b/alembic/versions/0022_add_podcast_enabled.py @@ -0,0 +1,27 @@ +"""Add podcast_enabled flag to podcast_preferences (default false — users opt in) + +Revision ID: 0022 +Revises: 0021 +Create Date: 2026-05-21 00:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0022" +down_revision: Union[str, None] = "0021" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "podcast_preferences", + sa.Column("podcast_enabled", sa.Boolean(), nullable=False, server_default="false"), + ) + + +def downgrade() -> None: + op.drop_column("podcast_preferences", "podcast_enabled") diff --git a/code_review.md b/code_review.md new file mode 100644 index 0000000..fbf0a1d --- /dev/null +++ b/code_review.md @@ -0,0 +1,290 @@ +# Code Review: Top 5 Priority Issues + +Reviewed: 2026-04-14 +Branch: `coPI-podcast` + +--- + +## Issue 1 — CSRF Bypass on Expired OAuth Session + +**File:** `src/routers/auth.py:76-79` +**Severity:** High (security) + +### Current Code + +```python +stored_state = request.session.pop("oauth_state", None) +if stored_state and state != stored_state: + logger.warning("OAuth state mismatch") + return RedirectResponse(url="/login?error=state_mismatch", status_code=302) +``` + +### Problem + +The guard condition is `if stored_state and ...`, meaning it only enforces the check when `stored_state` is truthy. If the user's session has expired (or was never set), `stored_state` is `None` and the entire check is skipped — any `state` value (including `None`) passes through. A CSRF attacker can initiate an OAuth flow, let the victim's session expire, then replay the callback with an arbitrary code. + +### Best Practice + +Per [RFC 6749 §10.12](https://datatracker.ietf.org/doc/html/rfc6749#section-10.12) and OWASP OAuth guidelines, the `state` parameter must be treated as a **required, non-nullable nonce**. The correct pattern is to reject the callback if `stored_state` is missing (session expired), not to treat it as a pass condition. + +### How to Fix + +Change the condition from a two-branch `if stored_state and ...` guard to an explicit three-case rejection: + +```python +stored_state = request.session.pop("oauth_state", None) + +if stored_state is None: + # Session expired before the callback arrived — cannot verify CSRF nonce + logger.warning("OAuth callback with no stored state (session expired or missing)") + return RedirectResponse(url="/login?error=session_expired", status_code=302) + +if state != stored_state: + logger.warning("OAuth state mismatch — possible CSRF attempt") + return RedirectResponse(url="/login?error=state_mismatch", status_code=302) +``` + +Also ensure the state nonce is generated with sufficient entropy. In `src/routers/auth.py` (in the `/login` route that initiates the flow), use `secrets.token_urlsafe(32)` rather than any shorter or predictable token, and store it in the session immediately before the redirect. + +--- + +## Issue 2 — Budget Enforcement Exits the Entire Simulation Loop + +**File:** `src/agent/simulation.py:218-222` +**Severity:** Medium (reliability / correctness) + +### Current Code + +```python +agent = self._select_agent() +if not agent or not self._agent_within_budget(agent): + # All agents over budget + logger.info("All agents over budget or no agent selected. Stopping.") + break +``` + +### Problem + +`_select_agent()` returns whichever agent is next in the rotation. If that specific agent is over budget, the entire simulation `break`s — even if every other agent still has budget remaining. The log comment says "All agents over budget" but that is only true in the case where `_select_agent` returns `None`; when it returns an agent that is individually over budget, the others are never checked. + +### Best Practice + +Budget exhaustion for a single agent should be a **skip**, not a **halt**. The loop should continue cycling through agents until every agent is either over budget or no agent can be selected at all. A common pattern is to track how many consecutive agents have been skipped and stop only when the skip count equals the total number of agents. + +### How to Fix + +Separate the two exit conditions and convert the over-budget case from `break` to `continue`. Count consecutive over-budget skips and only exit the loop when all agents have been skipped in a single pass: + +```python +over_budget_streak = 0 +total_agents = len(self._agents) + +while True: + agent = self._select_agent() + if not agent: + logger.info("No agent selected — simulation complete.") + break + + if not self._agent_within_budget(agent): + over_budget_streak += 1 + agent.state.last_selected = time.time() + if over_budget_streak >= total_agents: + logger.info("All agents over budget. Stopping.") + break + logger.debug("[%s] Over budget, skipping.", agent.agent_id) + continue + + over_budget_streak = 0 # reset when a valid agent is found + # ... rest of the turn logic +``` + +This requires that `_select_agent` rotates through agents based on `last_selected` time (which it already does), so agents that have been skipped will be picked up again on the next cycle. + +--- + +## Issue 3 — RSS Feed Served with Missing Audio File + +**File:** `src/podcast/main.py:89-103`, `src/podcast/pipeline.py` +**Severity:** Medium (reliability) + +### Current Code + +```python +try: + ok = await run_pipeline_for_agent( + agent_id=agent_id, + ... + ) + if ok: + produced.append(agent_id) +except Exception as exc: + logger.error( + "Pipeline failed for agent %s: %s", agent_id, exc, exc_info=True + ) +``` + +### Problem + +`run_pipeline_for_agent` returns a boolean `ok`, but within the pipeline itself the episode DB record and RSS entry can be written before the TTS step completes. If TTS fails, the audio file does not exist, but the feed already contains an `` pointing to a non-existent MP3. Any podcast client that subscribed to the feed will attempt a GET on a 404 URL and may display a broken episode permanently. + +### Best Practice + +The pipeline should follow a **commit-last** pattern: write the episode record and RSS enclosure only after all assets are confirmed present on disk. This is the same pattern used in video/audio platforms (e.g., YouTube's upload pipeline) — metadata is published only after the binary asset is available. + +### How to Fix + +Inside `src/podcast/pipeline.py`, restructure the steps in this order: + +1. Fetch and select the paper (read-only, safe to do first). +2. Generate the text brief (Claude Opus call). +3. Call TTS and write the audio file to disk. **Capture the returned path.** +4. Verify the audio file exists and has a non-zero size (`path.stat().st_size > 0`) before proceeding. +5. Only if step 4 passes: write the `PodcastEpisode` DB row and call `db_session.flush()`. +6. Only after the DB row is committed: build and write the RSS ``. + +If TTS fails at step 3, log the error and return `ok=False` without writing anything to the DB or RSS. The caller in `main.py` already handles `ok=False` correctly; the gap is in the pipeline not propagating TTS failures as `False`. + +As a secondary safeguard, the RSS endpoint (`/podcast/{agent_id}/feed.xml`) should check whether `data/podcast_audio/{agent_id}/{date}.mp3` exists before including the `` element in its output. This prevents any historical DB rows with missing audio from appearing in the feed. + +--- + +## Issue 4 — Non-Atomic File Writes for Profile and Podcast State + +**Files:** `src/agent/agent.py:423-444`, `src/podcast/state.py:22-24` +**Severity:** Medium (data integrity) + +### Current Code + +```python +# agent.py +memory_path.write_text(new_memory + "\n", encoding="utf-8") + +# state.py +def _save(data: dict) -> None: + STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + STATE_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8") +``` + +### Problem + +`Path.write_text` is not atomic — it opens the file for truncation and writes in multiple OS-level operations. If the process crashes, is killed, or two coroutines call the write concurrently, the file can be left in a partially written state (empty, or with truncated JSON). For `podcast_state.json`, this means the `delivered_pmids` list can be lost, causing duplicate Slack DMs. For working memory files, a partial write silently discards the agent's accumulated context. + +There is also a logical race: `_save` in `state.py` does a read-modify-write cycle (`_load()` → modify → `_save()`). Two concurrent podcast pipeline runs (possible if the scheduler is invoked twice) will both read the same initial state, both modify it independently, and whichever writes last will silently overwrite the other's changes. + +### Best Practice + +The standard pattern for atomic file writes on POSIX systems is **write to a temp file, then `os.rename`**. Because `rename` is guaranteed atomic by the POSIX spec (it is a single syscall), a reader will always see either the old complete file or the new complete file — never a partial write. Python's `tempfile.NamedTemporaryFile` with `delete=False` in the same directory is the standard way to achieve this. + +For the read-modify-write race in `state.py`, use a `threading.Lock` (or `asyncio.Lock` if the callers are async) as a process-level mutex around all load/save operations. + +### How to Fix + +**Atomic write helper** (can live in `src/utils.py` or inline in each module): + +```python +import os +import tempfile +from pathlib import Path + +def atomic_write_text(path: Path, content: str, encoding: str = "utf-8") -> None: + """Write `content` to `path` atomically using a temp-file + rename.""" + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(dir=path.parent, suffix=".tmp") + try: + with os.fdopen(fd, "w", encoding=encoding) as f: + f.write(content) + os.replace(tmp, path) # atomic on POSIX; overwrites destination + except Exception: + os.unlink(tmp) # clean up temp file on any error + raise +``` + +Replace all four `path.write_text(...)` calls in `agent.py` (lines 428 and 441) and `state.py` (line 24) with `atomic_write_text(path, content)`. + +**Lock for state.py read-modify-write:** + +```python +import threading +_STATE_LOCK = threading.Lock() + +def record_delivery(agent_id: str, pmid: str) -> None: + with _STATE_LOCK: + data = _load() + # ... modify ... + _save(data) # now uses atomic_write_text internally + +def mark_run_complete() -> None: + with _STATE_LOCK: + data = _load() + data["last_run_date"] = ... + _save(data) +``` + +**Note:** if these functions are ever called from async context across multiple event-loop threads (e.g., concurrent `run_pipeline_for_agent` calls), a `threading.Lock` is sufficient because `asyncio.run` uses a single thread per call. If concurrency is ever introduced via `asyncio.gather`, switch to `asyncio.Lock`. + +--- + +## Issue 5 — Per-Task Failures Silently Discarded in `asyncio.gather` + +**File:** `src/agent/simulation.py:632-637` +**Severity:** Low-Medium (observability / silent failure) + +### Current Code + +```python +tasks = [ + self._reply_to_thread(agent, thread) + for thread in threads_to_reply +] +await asyncio.gather(*tasks, return_exceptions=True) +``` + +### Problem + +`return_exceptions=True` causes `asyncio.gather` to return exceptions as result values instead of re-raising them. The return value here is discarded entirely, so any exceptions from individual `_reply_to_thread` calls are silently swallowed. If a Slack API error, DB write failure, or Claude API timeout occurs in any thread reply, it is invisible in logs and metrics. Operators have no signal that Phase 4 is partially or fully failing. + +### Best Practice + +When using `return_exceptions=True` the caller **must** inspect the results. The canonical pattern is to iterate the results list and log (or re-raise) any values that are `isinstance(r, BaseException)`. This is preferable to removing `return_exceptions=True` (which would cancel all remaining tasks on the first failure) because Phase 4 replies are independent — a failure on one thread should not prevent replies to others. + +### How to Fix + +Capture the return value of `asyncio.gather` and inspect each result: + +```python +results = await asyncio.gather(*tasks, return_exceptions=True) + +for thread, result in zip(threads_to_reply, results): + if isinstance(result, BaseException): + logger.error( + "[%s] Phase 4: Failed to reply to thread %s: %s", + agent.agent_id, + thread.thread_id, + result, + exc_info=result, # includes traceback in log record + ) +``` + +This pattern is appropriate anywhere `asyncio.gather(..., return_exceptions=True)` is used without inspecting results. There is a similar call site in `src/agent/simulation.py` for channel scanning — apply the same pattern there. Consider extracting a small helper: + +```python +async def gather_logged(tasks: list, label: str) -> list: + """gather with return_exceptions=True, logging each failure.""" + results = await asyncio.gather(*tasks, return_exceptions=True) + for i, r in enumerate(results): + if isinstance(r, BaseException): + logger.error("%s task[%d] failed: %s", label, i, r, exc_info=r) + return results +``` + +--- + +## Summary Table + +| # | File | Line(s) | Severity | Category | +|---|------|---------|----------|----------| +| 1 | `src/routers/auth.py` | 76-79 | High | Security — CSRF bypass | +| 2 | `src/agent/simulation.py` | 218-222 | Medium | Correctness — premature loop exit | +| 3 | `src/podcast/pipeline.py` + `main.py` | pipeline write order | Medium | Reliability — broken RSS enclosure | +| 4 | `src/agent/agent.py` + `src/podcast/state.py` | 428, 441, 22-24 | Medium | Data integrity — non-atomic writes | +| 5 | `src/agent/simulation.py` | 637 | Low-Medium | Observability — silent task failures | diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 44dc726..8dab0db 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -35,6 +35,7 @@ services: volumes: - ./profiles:/app/profiles - ./prompts:/app/prompts + - podcast_data:/app/data depends_on: postgres: condition: service_healthy @@ -83,7 +84,6 @@ services: volumes: - ./profiles:/app/profiles - ./prompts:/app/prompts - - ./data:/app/data depends_on: postgres: condition: service_healthy @@ -108,7 +108,7 @@ services: volumes: - ./profiles:/app/profiles - ./prompts:/app/prompts - - ./data:/app/data + - grantbot_data:/app/data depends_on: postgres: condition: service_healthy @@ -120,6 +120,29 @@ services: awslogs-create-group: "true" awslogs-region: ${AWS_REGION:-us-east-2} + podcast: + build: + context: . + restart: unless-stopped + command: ["python", "-m", "src.podcast.main", "scheduler"] + env_file: .env + environment: + DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-copi}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-copi} + volumes: + - ./profiles:/app/profiles + - ./prompts:/app/prompts + - podcast_data:/app/data + depends_on: + postgres: + condition: service_healthy + logging: + driver: awslogs + options: + awslogs-group: /copi/podcast + tag: podcast + awslogs-create-group: "true" + awslogs-region: ${AWS_REGION:-us-east-2} + nginx: image: nginx:1.27-alpine restart: unless-stopped @@ -167,3 +190,5 @@ services: volumes: pgdata: + grantbot_data: + podcast_data: diff --git a/docker-compose.yml b/docker-compose.yml index d686043..115bdea 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,6 +25,7 @@ services: - .:/app - ./profiles:/app/profiles - ./prompts:/app/prompts + - ./data:/app/data depends_on: postgres: condition: service_healthy @@ -69,5 +70,27 @@ services: postgres: condition: service_healthy + podcast: + build: . + command: python -m src.podcast.main scheduler + env_file: .env + environment: + # Override LOCAL_TTS_HOST so the container can reach a vLLM-Omni server + # running on the host machine (127.0.0.1 does not reach the host from inside Docker). + LOCAL_TTS_HOST: host.docker.internal + extra_hosts: + # Ensures host.docker.internal resolves on Linux (Docker Desktop sets it automatically on Mac/Windows). + - "host.docker.internal:host-gateway" + volumes: + - .:/app + - ./profiles:/app/profiles + - ./prompts:/app/prompts + - ./data:/app/data + depends_on: + postgres: + condition: service_healthy + profiles: + - podcast + volumes: pgdata: diff --git a/docs/cohort-infographic.html b/docs/cohort-infographic.html new file mode 100644 index 0000000..e3b84b5 --- /dev/null +++ b/docs/cohort-infographic.html @@ -0,0 +1,729 @@ + + + + + +coPI Agent System — Turn Loop & Cohort Scale + + + + + +
+
+

coPI Agent System

+

Turn-based conversation loop & cohort-based scale-up architecture

+
+
+ specs/cohort-system.md + specs/agent-system.md +
+
+ + +
+ 1 + Turn-Based Conversation Loop +
+ +
+ + +
+
+ 5-Phase Agent Turn + + per selected agent +
+ +
+
+
+
Phase 1
+
Channel Discovery
+
Keyword-match profile → join topic channels
+
No LLM
+
+
+
+
+
+
Phase 2
+
Scan & Filter
+
Evaluate new posts for relevance
+
Sonnet
+
+
+
+
+
+
Phase 3
+
Activate Threads
+
Open threads from tags & replies
+
No LLM
+
+
+
+
+
+
Phase 4
+
Reply Threads
+
Reply to all active threads (parallel)
+
Opus ×N
+
+
+
+
+
+
Phase 5
+
New Post
+
Start new conversation (conditional)
+
Opus ×1
+
+
+
+ +
+ + +
+ Thread Lifecycle (Phase 4) + + max 12 messages +
+ +
+
EXPLORE
+
+
1
+
2
+
3
+
4
+
+
Share work, ask questions, use retrieve_* tools
+
+
+
DECIDE
+
+
5
+
6
+
+
11
+
+
Narrow to genuine complementarity, build toward proposal
+
+
+
CONCLUDE
+
+
12
+
+
+
📝 Proposal
+
or
+
⏸ No overlap
+
+
+ +
+ Proposal confirmed when partner replies to the 📝 Summary block. + PI rates 0–5 in web app; rating = 0 reopens thread with PI guidance. +
+
+ + +
+
+ Agent Selection — Min-Heap + + O(log n) +
+ +
+ Agents are sorted by last_selected timestamp. The longest-waiting + eligible agents win the next slots. Replaces weighted-random to + guarantee no starvation at scale. +
+ + +
+
+
WisemanBot
+
+
+ waited longest +
+
+
← next
+
+
+
LotzBot
+
+
+ eligible +
+
+
← next
+
+
+
SuBot
+
+
+ eligible +
+
+
← next
+
+
+
RackiBot
+
+
+ on cooldown +
+
+
cooling
+
+
+
CravattBot
+
+
+ just ran +
+
+
cooling
+
+
+ +
+ + +
+ Concurrent Turn Slots + + concurrent_turns = active_thread_threshold +
+ +
+
+
Slot 1
+
WisemanBot
+
Phase 4 · Opus
+
+
+
Slot 2
+
LotzBot
+
Phase 2 · Sonnet
+
+
+
Slot 3
+
SuBot
+
Phase 5 · Opus
+
+
+ +
+ turn_delay_seconds repurposed: + was a global asyncio.sleep blocking all slots after each turn. + Now enforced as a per-agent cooldown at heap-build time — + (now − last_selected) ≥ turn_delay_seconds. + All other slots stay active during any agent's cooldown. +
+
+ +
+ + +
+ 2 + Cohort-Based Scale-Up +
+ +
+ + +
+
+ From All-vs-All → Cohort-Scoped +
+ +
+ +
+
❌ All-vs-All (current)
+
+ Every agent scans posts from all others.
+ LLM calls scale as O(n²). +
+
+
A
+
B
+
C
+
D
+
E
+
F
+
+
6 agents → 15 possible pairs
all evaluated every scan
+
+ + +
+
✓ With Cohorts
+
+ Agents only act on cohort-mates.
+ Scan cost bounded by cohort size. +
+
+
+
Cohort 1
+
+
A
+
B
+
C
+
+
+
+
Cohort 2
+
+
B
+
D
+
E
+
F
+
+
+
+
B spans both cohorts · memberships
change live · no restart needed
+
+
+ +
+ + +
+ Data Model + + migration 0023 +
+ +
+
+
cohorts
+
id uuid pk
+
name text unique
+
description text?
+
created_by → users
+
created_at timestamptz
+
+
+
cohort_memberships
+
id uuid pk
+
cohort_id → cohorts ⬆
+
agent_id text
+
added_by → users
+
added_at timestamptz
+
+
+
+ + +
+ +
+
+ Interaction Gate + + pure filter +
+ +
+ def can_interact(self, other) → bool:
+   # uncohorted → interact with all
+   if not self.cohort_ids or not other.cohort_ids:
+     return True
+   return bool(self.cohort_ids & other.cohort_ids) +
+ +
+
+
🔍
+
+ Phase 2 — filter posts before LLM prompt
+ Saves Sonnet calls on non-cohort senders +
+
+
+
+
🔗
+
+ Phase 3 — skip thread activation from non-cohort agents
+ No state bloat from irrelevant replies +
+
+
+
+
📢
+
+ Phase 5 — block tagging non-cohort agents
+ Saves Opus call; prunes stale interesting_posts +
+
+
+ +
+ Phase 4 requires no gate — threads already exist between cohort-mates when opened. + Scheduling is cohort-unaware; cohorts only affect what happens inside a turn. +
+
+ +
+
+ Admin UI + + /admin/cohorts +
+ +
+
+ GET + /admin/cohorts + list + create form +
+
+ POST + /admin/cohorts/create + admin only +
+
+ GET + /admin/cohorts/{id} + members + map +
+
+ POST + …/{id}/add-agent + live, no restart +
+
+ POST + …/{id}/remove-agent + open threads unaffected +
+
+ POST + …/{id}/delete + requires 0 members +
+
+ +
+ Membership resyncs every 60 s mid-run. Resync rebuilds + agent.cohort_ids only — no state wipe, no thread closure. +
+
+ +
+
+ + + + + + diff --git a/orcids.txt b/orcids.txt index c8caf66..d8e7d97 100644 --- a/orcids.txt +++ b/orcids.txt @@ -30,3 +30,83 @@ 0000-0003-2819-4049 # Luke Lairson 0000-0001-6701-996X +# Ahmed Badran +0000-0002-8105-1883 +# Peng Wu +0000-0002-5204-0229 +# Keren Lasker +0000-0002-5480-4173 +# Dorothee Kern +0000-0002-7631-8328 +# Marco Mravic +0000-0001-6294-1824 +# Ian MacRae +0000-0002-5112-0294 +# James Paulson +0000-0003-4589-5322 +# Giordano Lippi +0000-0003-3911-0525 +# Ian Wilson +0000-0002-6469-2419 +# Ian Seiple +0000-0002-8732-1362 +# Shannon Miller +0000-0003-3569-6231 +# James Williamson +0000-0002-8772-468X +# Colleen Maillie +0000-0001-7050-4464 + +# Cabo retreat additions — 2026-05-01 +# Scripps Research +# David Millar +0000-0001-9207-6958 +# UCSF +# Andrej Sali +0000-0003-0435-6197 +# Carolyn Larabell +0000-0002-6262-4789 +# Balyn Zaro +0000-0002-8938-9889 +# Leah Roe +0000-0002-2487-5587 +# Daniel Santi +0000-0002-3790-0673 +# James Wells +0000-0001-8267-5519 +# Ignacia Echeverria +0000-0003-4717-1467 +# James Fraser +0000-0002-5080-2859 +# Charles Craik +0000-0001-7704-9185 +# Robert Stroud +0000-0003-2083-5665 +# Daniel Minor Jr. +0000-0002-5998-4214 +# Aashish Manglik +0000-0002-7173-3741 +# Katherine Susa +0000-0003-0077-667X +# Tony Capra (John A. Capra) +0000-0001-9743-1795 +# New PIs — 2026-05-01 +# Stanford +# Peter S. Kim +0000-0001-6503-4541 +# Genentech +# Caleigh Azumaya +0000-0002-3484-9921 +# UC Berkeley +# Daniel K. Nomura +0000-0003-1614-8360 +# Mark Yeager +0000-0002-3301-640X +# Jonathan Moore +0000-0001-8633-3313 +# Michael Williams +0009-0002-3422-6713 +# Mohammad Alanjary +0000-0001-8420-1325 +# Hollis Cline +0000-0002-4887-9603 diff --git a/pipairs.tsv b/pipairs.tsv new file mode 100644 index 0000000..958add5 --- /dev/null +++ b/pipairs.tsv @@ -0,0 +1,41 @@ +Wiseman Lotz +Deniz Lairson +Deniz Petrascheck +Lotz Cravatt +Petrascheck Grotjahn +Wu Lairson +Forli Su +Forli Ward +Deniz Su +Briney Su +Racki Ken +Racki Su +Racki Lairson +Grotjahn Wiseman +Racki Grotjahn +Cravatt Wiseman +Cravatt Su +Forli Lairson +Saez Wiseman +Deniz Racki +Saez Petrascheck +Deniz Ken +Saez Grotjahn +Deniz Wiseman +Ken Lairson +Briney Ward +Grotjahn Lairson +Cravatt Lairson +Su Petrascheck +Deniz Grotjahn +Saez Lairson +Lotz Wu +Forli Petrascheck +Cravatt Ken +Briney Ward +Cravatt Forli +Saez Su +Su Wiseman +Forli Ken +Wu Su +Wiseman Briney diff --git a/prompts/agent-system.md b/prompts/agent-system.md index 37f033b..1aecf97 100644 --- a/prompts/agent-system.md +++ b/prompts/agent-system.md @@ -1,9 +1,10 @@ # Agent System Prompt -You are an AI agent representing a research lab at Scripps Research in a Slack workspace called "labbot". +You are an AI agent representing a research lab at Scripps Research in a Slack workspace. Your role is to facilitate scientific collaboration by engaging authentically with other lab agents. All agents represent real labs with real researchers — your goal is to identify genuinely valuable collaboration opportunities, not to generate noise. +Your task is to produce a high-quality collaboration proposal that follows the Proposal Generation Rules and meets the listed quality standards by engaging in dialouge between agents. You have access to each PI's public profile associated with the user (or profiles in profiles/public), private instructions (profiles in profiles/private), and recent relevant publications. Use all of this to initiate conversations with the ultimate goal of generating a specific, grounded, and actionable proposal after sufficient discussion. ## Core Rules @@ -19,84 +20,9 @@ collaboration opportunities, not to generate noise. 4. **DM rules.** You may DM your own PI to report on discussions or ask for guidance. You cannot DM other labs' PIs or send agent-to-agent DMs. -## Collaboration Quality Standards +## Proposal Generation Rules -These standards apply to every collaboration idea you propose or explore. Your PI's private instructions -may adjust these defaults — always follow PI instructions when they conflict. - -### Core Principles - -1. **Specificity.** Every collaboration idea must name specific techniques, models, reagents, datasets, - or expertise from each lab's profile. "Lab A's expertise in X" is not enough — say what specifically - they would do and with what. - -2. **True complementarity.** Each lab must bring something the other doesn't have. If either lab's - contribution could be described as a generic service (e.g., "computational analysis", "structural studies", - "mouse behavioral testing") without reference to the specific scientific question, the idea is too generic. - -3. **Concrete first experiment.** Any collaboration that advances beyond initial interest must include - a proposed first experiment scoped to days-to-weeks of effort. The experiment must name specific assays, - computational methods, reagents, or datasets. "We would analyze the data" is not a first experiment. - -4. **Silence over noise.** If you cannot articulate what makes this collaboration better than either lab - hiring a postdoc to do the other's part, do not propose it. - -5. **Non-generic benefits.** Both labs must benefit in ways specific to the collaboration. "Access to - new techniques" is too vague. "Structural evidence for the mechanism of mitochondrial rescue at - nanometer resolution, strengthening the therapeutic narrative for HRI activators" is specific. - -### Confidence Labels - -When you propose a collaboration, label your confidence level: -- *[High]* — Clear complementarity, specific anchoring to recent work, concrete first experiment, - both sides benefit non-generically -- *[Moderate]* — Good synergy but first experiment is less defined, or one side's benefit is less clear -- *[Speculative]* — Interesting angle but requires more development — use "This is speculative, but..." - -### Examples of Good Collaboration Ideas - -**Good: Specific question, specific contributions, concrete experiment** -> Wiseman's HRI activators induce mitochondrial elongation in MFN2-deficient cells, but the ultrastructural -> basis is unknown. Grotjahn's cryo-ET and Surface Morphometrics pipeline could directly visualize this -> remodeling at nanometer resolution. First experiment: Wiseman provides treated vs untreated MFN2-deficient -> fibroblasts, Grotjahn runs cryo-FIB-SEM and cryo-ET on both conditions, quantifying cristae morphology -> and membrane contact site metrics. - -**Good: Each lab has something the other literally cannot do alone** -> Petrascheck's atypical tetracyclines provide neuroprotection via ISR-independent ribosome targeting. -> Wiseman's HRI activators work through ISR-dependent pathways. Neither lab can test the combination alone. -> First experiment: mix compounds in neuronal ferroptosis assays, measure survival, calculate combination -> indices for synergy. - -**Good: Computational contribution is specific, not generic** -> Lotz's JCI paper identified cyproheptadine as an H1R inverse agonist activating FoxO in chondrocytes, -> but the structural basis for FoxO activation vs antihistamine activity is unknown. Su's BioThings -> knowledge graph could identify additional H1R ligands with FoxO activity data across multiple -> orthogonal datasets. First experiment: Lotz provides 10-15 H1R ligands with FoxO activity data, -> Su runs BioThings traversal to identify structural and mechanistic correlates from published datasets. - -### Examples of Bad Collaboration Ideas (do not propose these) - -**Bad: Descriptive imaging without leverage** -> "Grotjahn could use cryo-ET to visualize disc matrix degeneration in Lotz samples." — This may -> generate interesting images, but it is mostly descriptive. It does not clearly unlock a mechanistic -> bottleneck, therapeutic decision, or scalable downstream program. - -**Bad: Mechanistic depth without an intervention path** -> "A chromatin-focused collaboration could add mechanistic depth to disc regeneration work." — This -> sounds sophisticated, but it is not tied to a clear intervention strategy or near-term decision. - -**Bad: Incremental validation of an already-supported pathway** -> "Petrascheck could test the FoxO-H1R pathway in C. elegans aging assays." — Orthogonal validation -> alone is not enough if it only incrementally confirms a pathway that is already fairly well supported. - -**Bad: Generic screening in an overused model** -> "Run a high-throughput screen for FoxO activators in a C. elegans aging model." — A screen is not -> automatically compelling if the assay class is overused and the proposal lacks a distinctive hypothesis. - -**Bad: Novel but still low-leverage imaging** -> "Use cryo-ET to compare the chondrocyte-matrix interface in OA versus control samples." — Novelty -> and visual appeal are not sufficient without mechanistic or translational leverage. +{{include: colab-proposal-rules.md}} ## Communication Style @@ -163,13 +89,7 @@ Every thread must reach one of two outcomes: **Outcome 1: Collaboration Proposal** (rare — only the best ideas) -Post a `:memo: Summary` reply containing: -- **What each lab brings** (specific techniques, reagents, datasets — not generic capabilities) -- **The specific scientific question** being addressed -- **A concrete first experiment** scoped to days-to-weeks, naming specific assays/methods/reagents, - requiring modest effort from both sides -- **Why this collaboration is better** than either lab doing it independently -- **Confidence label** ([High], [Moderate], or [Speculative]) +Generate a proposal conforming to the "Proposal Generation Rules" and output format The other agent confirms agreement by replying with ✅. diff --git a/prompts/colab-proposal-rules.md b/prompts/colab-proposal-rules.md new file mode 100644 index 0000000..20e6a4b --- /dev/null +++ b/prompts/colab-proposal-rules.md @@ -0,0 +1,125 @@ +## Collaboration Quality Standards + +These standards apply to every collaboration proposal. PI private instructions may adjust these +defaults — always follow PI instructions when they conflict. + +### Core Principles + +1. **Specificity.** Every collaboration idea must name specific techniques, models, reagents, datasets, + or expertise from each lab's profile. "Lab A's expertise in X" is not enough — say what specifically + they would do and with what. + +2. **True complementarity.** Each lab must bring something the other doesn't have. If either lab's + contribution could be described as a generic service (e.g., "computational analysis", "structural + studies", "mouse behavioral testing") without reference to the specific scientific question, the + idea is too generic. + +3. **Concrete first experiment.** Any collaboration proposal must include a first experiment scoped + to days-to-weeks of effort. The experiment must name specific assays, computational methods, + reagents, or datasets. "We would analyze the data" is not a first experiment. + +4. **Silence over noise.** If you cannot articulate what makes this collaboration better than either + lab hiring a postdoc to do the other's part, do not propose it. + +5. **Non-generic benefits.** Both labs must benefit in ways specific to the collaboration. "Access to + new techniques" is too vague. "Structural evidence for the mechanism of mitochondrial rescue at + nanometer resolution, strengthening the therapeutic narrative for HRI activators" is specific. + +### Confidence Labels + +- **High** — Clear complementarity, specific anchoring to recent work, concrete first experiment, + both sides benefit non-generically +- **Moderate** — Good synergy but first experiment is less defined, or one side's benefit is less clear +- **Speculative** — Interesting angle but requires more development — label sections accordingly + +### Examples of Good Collaboration Ideas + +**Good: Specific question, specific contributions, concrete experiment** +> Wiseman's HRI activators induce mitochondrial elongation in MFN2-deficient cells, but the ultrastructural +> basis is unknown. Grotjahn's cryo-ET and Surface Morphometrics pipeline could directly visualize this +> remodeling at nanometer resolution. First experiment: Wiseman provides treated vs untreated MFN2-deficient +> fibroblasts, Grotjahn runs cryo-FIB-SEM and cryo-ET on both conditions, quantifying cristae morphology +> and membrane contact site metrics. + +**Good: Each lab has something the other literally cannot do alone** +> Petrascheck's atypical tetracyclines provide neuroprotection via ISR-independent ribosome targeting. +> Wiseman's HRI activators work through ISR-dependent pathways. Neither lab can test the combination alone. +> First experiment: mix compounds in neuronal ferroptosis assays, measure survival, calculate combination +> indices for synergy. + +**Good: Computational contribution is specific, not generic** +> Lotz's JCI paper identified cyproheptadine as an H1R inverse agonist activating FoxO in chondrocytes, +> but the structural basis for FoxO activation vs antihistamine activity is unknown. Su's BioThings +> knowledge graph could identify additional H1R ligands with FoxO activity data across multiple +> orthogonal datasets. First experiment: Lotz provides 10–15 H1R ligands with FoxO activity data, +> Su runs BioThings traversal to identify structural and mechanistic correlates from published datasets. + +### Examples of Bad Collaboration Ideas + +**Bad: Descriptive imaging without leverage** +> "Grotjahn could use cryo-ET to visualize disc matrix degeneration in Lotz samples." — This may +> generate interesting images, but it is mostly descriptive. It does not clearly unlock a mechanistic +> bottleneck, therapeutic decision, or scalable downstream program. + +**Bad: Mechanistic depth without an intervention path** +> "A chromatin-focused collaboration could add mechanistic depth to disc regeneration work." — This +> sounds sophisticated, but it is not tied to a clear intervention strategy or near-term decision. + +**Bad: Incremental validation of an already-supported pathway** +> "Petrascheck could test the FoxO-H1R pathway in C. elegans aging assays." — Orthogonal validation +> alone is not enough if it only incrementally confirms a pathway that is already fairly well supported. + +**Bad: Generic screening in an overused model** +> "Run a high-throughput screen for FoxO activators in a C. elegans aging model." — A screen is not +> automatically compelling if the assay class is overused and the proposal lacks a distinctive hypothesis. + +**Bad: Novel but still low-leverage imaging** +> "Use cryo-ET to compare the chondrocyte-matrix interface in OA versus control samples." — Novelty +> and visual appeal are not sufficient without mechanistic or translational leverage. + +--- + +## Instructions + +Produce ONE collaboration proposal between PI A and PI B using the output format below. + +- Apply the Collaboration Quality Standards strictly. +- Ground the proposal in specific publications, techniques, and findings from each profile. +- Respect each PI's private instructions when framing the proposal: if a PI has expressed preferences + for specific topics, partners, or collaboration styles, weight those angles positively. +- Do NOT quote or reveal any private instruction text verbatim in the output. +- If you cannot identify a High or Moderate confidence collaboration, produce the best Speculative + proposal you can and label it clearly. +- Wrap your entire proposal (and only the proposal) in `` tags. +- Do not include reasoning steps, tool call markup, or self-commentary before or after the `` block. In agent contexts, a Slack-formatted memo may precede the tag — all other preamble is forbidden. + +## Output Format + + +# [Collaboration Title — specific, not generic] +**[PI_A] + [PI_B]** [Timestamp] +**Confidence:** High | Moderate | Speculative + +## Scientific Rationale +[2–3 paragraphs. Why these two labs? What does each bring that the other lacks? Name specific +techniques, datasets, reagents, or model systems from recent publications.] + +## True Complementarity +- **PI A contributes:** [specific capabilities — not generic] +- **PI B contributes:** [specific capabilities — not generic] +- **Gap filled:** [what neither could do alone, stated precisely] + +## Concrete First Experiment +[1 paragraph. Scoped to days-to-weeks. Names specific assays, methods, reagents, or datasets. +Explains why both labs are essential to execute it.] + +## Benefits to Each Lab +- **PI A benefits:** [specific, non-generic — tied to their research goals] +- **PI B benefits:** [specific, non-generic — tied to their research goals] + +## Open Questions / Next Steps +- [Bullet list of what would need to be confirmed before committing effort] + +## Effort / Resources +- [Estimate low/med/high for each PI in terms of time, people, and resources required to acheive the goals of the proposal] + \ No newline at end of file diff --git a/prompts/matchmaker.md b/prompts/matchmaker.md new file mode 100644 index 0000000..f9d21a8 --- /dev/null +++ b/prompts/matchmaker.md @@ -0,0 +1,9 @@ +You are evaluating a potential research collaboration between two PIs. + +Your task is to produce a high-quality collaboration proposal that follows the Proposal Generation Rules and meets the listed quality standards. You have access to each PI's public profile, private instructions, and recent publications. Use all of this to generate a specific, grounded, and actionable proposal. + +**Output only the `` block. Do not include any text before or after it — no reasoning steps, no preamble, no tool calls, no commentary.** + +## Proposal Generation Rules + +{{include: colab-proposal-rules.md}} \ No newline at end of file diff --git a/prompts/phase4-thread-reply.md b/prompts/phase4-thread-reply.md index a1de54a..79632ea 100644 --- a/prompts/phase4-thread-reply.md +++ b/prompts/phase4-thread-reply.md @@ -81,6 +81,10 @@ you should already have the information you need. {instructions} +## Proposal Generation Rules + +{{include: colab-proposal-rules.md}} + ## Output Your final response MUST contain exactly one `` block. Everything inside @@ -95,12 +99,7 @@ Your message here — written as it should appear in Slack. You may think/reason freely outside the block, but ONLY the content between `` and `` tags will be posted. -If you are posting a :memo: Summary (collaboration proposal), format it clearly with: -- What each lab brings -- The specific scientific question -- A concrete first experiment (days-to-weeks scope, specific assays/methods) -- Why this collaboration beats either lab working alone -- Confidence label: [High], [Moderate], or [Speculative] +If you are posting a :memo: Summary (collaboration proposal), format it clearly by conforming to the Proposal Generation Rules If you are confirming agreement with a :memo: Summary from the other agent, start your reply with ✅. This means you accept the proposal **exactly as written** — do not add diff --git a/prompts/podcast-select.md b/prompts/podcast-select.md new file mode 100644 index 0000000..121af03 --- /dev/null +++ b/prompts/podcast-select.md @@ -0,0 +1,46 @@ +You are a literature triage assistant for a scientific researcher. Your job is to identify the single most relevant and impactful recent paper from a list of candidates, based on the researcher's profile. + +## Researcher Profile + +{profile} + +## PI Podcast Preferences + +{preferences} + +## Task + +Below is a numbered list of recent publications (title + abstract). Select the ONE paper whose findings or outputs could most plausibly accelerate or inform a specific aspect of this researcher's ongoing work. + +Return your answer as JSON: +```json +{"index": , "justification": ""} +``` + +If no paper clears the relevance bar, return: +```json +{"index": null, "justification": "No paper is sufficiently relevant to this researcher's current work."} +``` + +## Selection Criteria + +**INCLUDE** a paper if: +- Its findings or methods could directly accelerate a specific ongoing project, technique, or open question in the researcher's profile +- It releases a new tool, dataset, method, or reagent relevant to the researcher's techniques or targets +- It addresses a disease area, model system, or molecular target the researcher actively works on + +**EXCLUDE** a paper if: +- The connection to the researcher's work is only superficial or generic +- It is a review article, editorial, or commentary (no new primary data) +- It is purely clinical or epidemiological with no basic science relevance +- Recency alone makes it interesting — the connection must be specific and actionable + +**NOTE:** Some candidates are preprints (from bioRxiv, medRxiv, or arXiv) and are marked as such in the journal field. Preprints are valid candidates — treat them the same as peer-reviewed papers for selection purposes. + +**PREFER** papers that release a concrete output alongside findings (code, dataset, protocol, reagent, model). These tend to be immediately useful. + +**FOLLOW PI PREFERENCES:** If the PI Podcast Preferences section above contains specific instructions (e.g., topic focus, exclusions, prioritizations), apply them when selecting. PI preferences override the general criteria above. + +## Candidate Papers + +{candidates} diff --git a/prompts/podcast-summarize.md b/prompts/podcast-summarize.md new file mode 100644 index 0000000..1a96589 --- /dev/null +++ b/prompts/podcast-summarize.md @@ -0,0 +1,46 @@ +You are a science communicator writing a personalized research brief for a specific PI. Your goal is to help the PI quickly grasp whether and how a new paper is useful to their lab. + +## Researcher Profile + +{profile} + +## PI Podcast Preferences + +{preferences} + +## Paper + +{paper} + +## Task + +Write a structured research brief following the exact format below. Be specific, direct, and concise — like a knowledgeable postdoc briefing their PI. No filler phrases, no generic connections. + +--- + +*Today's Research Brief — {date}* + +*{paper_title}* +{authors} · {journal} · {year} + +*What they found:* +[2–3 sentences on core findings. Include specific results, effect sizes, or key observations. Be concrete — name specific proteins, pathways, organisms, or quantitative outcomes where relevant.] + +*Key output:* +[1–2 sentences on the tool, method, dataset, code, protocol, or reagent released with the paper. ONLY include this section if the paper releases a concrete artifact. If there is no distinct output, omit this section entirely — do not write "N/A" or a placeholder.] + +*Why this matters for your lab:* +[2–3 sentences connecting the paper specifically to this PI's work. You MUST name at least one specific technique, model system, molecular target, or open question from the researcher's profile. Do not write generic connections like "this is relevant to your proteomics work" — say exactly what aspect and how.] + +*Link:* {paper_url} + +--- + +## Rules + +- Total length: approximately 200–280 words +- Tone: collegial and precise, not promotional +- The "Why this matters" section is the most important — make it specific to this researcher, not a general statement about the field +- If the PI Podcast Preferences section contains specific instructions on tone, focus, or framing, follow them +- If the abstract is all you have, base the brief on the abstract. Do not speculate about full-text content you weren't given. +- Do not add any text before or after the brief itself diff --git a/pyproject.toml b/pyproject.toml index d09fa83..6b780d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "boto3>=1.34.0", "typer>=0.12.0", "rich>=13.7.0", + "mutagen>=1.47.0", ] [project.optional-dependencies] diff --git a/scripts/fix_proposal_summaries.py b/scripts/fix_proposal_summaries.py new file mode 100644 index 0000000..264a1a9 --- /dev/null +++ b/scripts/fix_proposal_summaries.py @@ -0,0 +1,29 @@ +# scripts/fix_proposal_summaries.py +import asyncio, re +from sqlalchemy import select +from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession +from src.config import get_settings +from src.models import ThreadDecision + +async def fix(): + settings = get_settings() + engine = create_async_engine(settings.database_url) + factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) + + async with factory() as db: + result = await db.execute( + select(ThreadDecision).where(ThreadDecision.outcome == "proposal") + ) + decisions = result.scalars().all() + fixed = 0 + for d in decisions: + if not d.summary_text: + continue + match = re.search(r"(.*?)", d.summary_text, re.DOTALL) + if match: + d.summary_text = match.group(1).strip() + fixed += 1 + await db.commit() + print(f"Fixed {fixed} / {len(decisions)} proposals") + +asyncio.run(fix()) diff --git a/scripts/matchmaker_cli.py b/scripts/matchmaker_cli.py new file mode 100644 index 0000000..a8072ac --- /dev/null +++ b/scripts/matchmaker_cli.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +"""Generate a matchmaker collaboration proposal from two PI profile directories. + +Usage (from repo root inside the app container): + + Single pair (positional args): + python scripts/matchmaker_cli.py [--dry-run] + + Batch from TSV file (-t flag, no positional args): + python scripts/matchmaker_cli.py -t pairs.tsv [--dry-run] + +The TSV file has two tab-separated columns (pi_a, pi_b), one pair per line. +Lines starting with '#' and blank lines are ignored. A header row whose first +cell is "pi_a" (case-insensitive) is also skipped automatically. + +Examples: + python scripts/matchmaker_cli.py su wiseman + python scripts/matchmaker_cli.py grotjahn lotz --dry-run + python scripts/matchmaker_cli.py -t pairs.tsv + python scripts/matchmaker_cli.py -t pairs.tsv --dry-run + +The PI slug must match a filename in profiles/public/ (without .md extension). +Private profiles from profiles/private/{slug}.md are included if they exist. + +Results are written to the matchmaker_proposals DB table and are immediately +visible in the admin Matchmaker tab at /admin/matchmaker. +""" + +import argparse +import asyncio +import re +import sys +from pathlib import Path + + +def load_profile_files(slug: str) -> tuple[str, str, str]: + """Load public + private profiles for a given slug. + + Returns (pi_name, public_md, private_md). + pi_name is extracted from the '**PI:**' line in the public profile. + """ + slug = slug.lower() + public_path = Path("profiles/public") / f"{slug}.md" + private_path = Path("profiles/private") / f"{slug}.md" + + if not public_path.exists(): + available = sorted(p.stem for p in Path("profiles/public").glob("*.md")) + print(f"Error: no public profile found for '{slug}'.") + print(f"Available slugs: {', '.join(available)}") + sys.exit(1) + + public_md = public_path.read_text() + + # Extract PI name from "**PI:** Name" line + pi_name = slug.capitalize() + match = re.search(r"\*\*PI:\*\*\s*(.+)", public_md) + if match: + pi_name = match.group(1).strip() + + private_md = private_path.read_text() if private_path.exists() else "" + + return pi_name, public_md, private_md + + +async def run(slug_a: str, slug_b: str, dry_run: bool) -> None: + from src.config import get_settings + from src.services.llm import generate_matchmaker_proposal + + name_a, public_a, private_a = load_profile_files(slug_a) + name_b, public_b, private_b = load_profile_files(slug_b) + + settings = get_settings() + + print(f"Generating proposal: {name_a} × {name_b}") + print(f"Model: {settings.llm_agent_model_opus}") + print("Calling LLM… (this may take 10–20 seconds)") + + result = await generate_matchmaker_proposal( + name_a=name_a, + public_profile_a=public_a, + private_profile_a=private_a, + publications_a="(see public profile above)", + name_b=name_b, + public_profile_b=public_b, + private_profile_b=private_b, + publications_b="(see public profile above)", + model=settings.llm_agent_model_opus, + ) + + print(f"\nConfidence : {result['confidence'].upper()}") + print(f"Title : {result['title']}") + print(f"Tokens : {result['input_tokens']} in / {result['output_tokens']} out") + print("\n" + "─" * 72) + print(result["proposal_md"]) + print("─" * 72) + + if dry_run: + print("\n[dry-run] Skipping database write.") + return + + # Write to DB + import uuid + from datetime import datetime, timezone + + from sqlalchemy import text + + from src.database import get_engine, get_session_factory + from src.models.matchmaker import MatchmakerProposal + + engine = get_engine() + session_factory = get_session_factory() + + async with session_factory() as session: + proposal = MatchmakerProposal( + id=uuid.uuid4(), + pi_a_id=None, + pi_b_id=None, + pi_a_name=name_a, + pi_b_name=name_b, + proposal_md=result["proposal_md"], + title=result["title"], + confidence=result["confidence"], + llm_model=result["model"], + input_tokens=result["input_tokens"], + output_tokens=result["output_tokens"], + generated_at=datetime.now(timezone.utc), + ) + session.add(proposal) + await session.commit() + print(f"\nSaved to DB: {proposal.id}") + print(f"View at : /admin/matchmaker/{proposal.id}") + + await engine.dispose() + + +def _parse_tsv(path: str) -> list[tuple[str, str]]: + """Parse a two-column TSV file into a list of (pi_a, pi_b) slug pairs.""" + pairs: list[tuple[str, str]] = [] + with open(path) as f: + for lineno, line in enumerate(f, 1): + line = line.rstrip("\n") + if not line or line.startswith("#"): + continue + parts = line.split("\t") + if len(parts) < 2: + print(f"Warning: line {lineno} has fewer than 2 columns, skipping: {line!r}") + continue + a, b = parts[0].strip(), parts[1].strip() + if lineno == 1 and a.lower() == "pi_a": + continue # skip header row + if not a or not b: + print(f"Warning: line {lineno} has empty slug, skipping.") + continue + pairs.append((a, b)) + return pairs + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Generate matchmaker proposals from PI profile slugs.", + epilog=( + "Single pair: matchmaker_cli.py su wiseman\n" + "Batch TSV: matchmaker_cli.py -t pairs.tsv" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("pi_a", nargs="?", help="Slug for PI A (e.g. 'su')") + parser.add_argument("pi_b", nargs="?", help="Slug for PI B (e.g. 'wiseman')") + parser.add_argument( + "-t", "--tsv", + metavar="FILE", + help="TSV file with two columns (pi_a, pi_b); one pair per line", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Print proposals to stdout without writing to the database", + ) + args = parser.parse_args() + + # Build list of pairs to process + if args.tsv: + if args.pi_a or args.pi_b: + parser.error("Cannot combine -t/--tsv with positional PI arguments.") + pairs = _parse_tsv(args.tsv) + if not pairs: + print("No valid pairs found in TSV file.") + sys.exit(1) + elif args.pi_a and args.pi_b: + pairs = [(args.pi_a, args.pi_b)] + else: + parser.error("Provide either two positional slugs or -t FILE.") + + errors: list[str] = [] + for i, (slug_a, slug_b) in enumerate(pairs): + if len(pairs) > 1: + print(f"\n{'='*72}") + print(f"Pair {i + 1}/{len(pairs)}: {slug_a} × {slug_b}") + print(f"{'='*72}") + if slug_a == slug_b: + msg = f"Skipping {slug_a} × {slug_b}: PI A and PI B must be different." + print(msg) + errors.append(msg) + continue + try: + asyncio.run(run(slug_a, slug_b, args.dry_run)) + except SystemExit: + errors.append(f"Failed: {slug_a} × {slug_b}") + + if errors: + print(f"\n{len(errors)} pair(s) failed:") + for e in errors: + print(f" {e}") + + +if __name__ == "__main__": + main() diff --git a/scripts/provision_slack_bots.py b/scripts/provision_slack_bots.py new file mode 100644 index 0000000..3e7e025 --- /dev/null +++ b/scripts/provision_slack_bots.py @@ -0,0 +1,780 @@ +#!/usr/bin/env python3 +""" +Provision Slack apps for all LabBots that don't yet have a bot token. + +How it works +------------ +1. Reads PILOT_LABS to find bots without SLACK_BOT_TOKEN_ in .env +2. Creates a Slack app for each via the Manifest API (apps.manifest.create) +3. Starts a local OAuth callback server on --port (default 8888) +4. Prints authorize URLs — a workspace admin clicks each one in a browser +5. Each click redirects back here; the code is exchanged for an xoxb- token +6. Tokens are appended to .env as SLACK_BOT_TOKEN_ + +Prerequisites (one-time, done by a workspace admin in a browser) +----------------------------------------------------------------- + 1. Go to https://api.slack.com/apps + 2. Click "Your App Configuration Tokens" → "Generate Token" for your workspace + 3. Copy both the token (xoxe-...) and the refresh token + 4. Add to .env: + SLACK_CONFIG_TOKEN=xoxe-... + SLACK_CONFIG_REFRESH_TOKEN=xoxe-... + +Usage +----- + # From project root: + python scripts/provision_slack_bots.py + + # Custom port or env file: + python scripts/provision_slack_bots.py --port 9000 --env-file .env + + # Preview what would be created without calling any APIs: + python scripts/provision_slack_bots.py --dry-run + + # Re-run the OAuth step without recreating apps (useful if the server was + # interrupted midway — re-uses credentials saved in .provision_state.json): + python scripts/provision_slack_bots.py --skip-create + +Remote / no-browser workflow +----------------------------- +Use this when you don't have browser access to the Slack workspace and need +to delegate the approval step to the workspace admin. + + Step 1 — on your server, create the apps and export the OAuth URLs: + python scripts/provision_slack_bots.py --export-urls + + This prints one URL per bot and writes them to oauth_urls.txt. + Send that file (or the printed URLs) to the workspace admin. + + Step 2 — the admin opens each URL in a browser, clicks Allow, and lands on + httpbin.org showing JSON like: + {"args": {"code": "abc123", "state": "su"}, ...} + They copy each agent_id and code into a plain text file (one per line): + su:abc123 + wiseman:def456 + lotz:ghi789 + and send the file back to you. + + Step 3 — on your server, exchange the codes for tokens: + python scripts/provision_slack_bots.py --exchange-codes codes.txt +""" + +import argparse +import json +import sys +import threading +import time +import urllib.parse +from http.server import BaseHTTPRequestHandler, HTTPServer +from pathlib import Path + +import httpx +from dotenv import dotenv_values, set_key +from rich.console import Console +from rich.table import Table + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +SLACK_API = "https://slack.com/api" +CALLBACK_PATH = "/oauth/callback" +STATE_FILE = Path(".provision_state.json") + +# Used by --export-urls / --exchange-codes: Slack redirects the admin here after +# approval and httpbin echoes the code + state as JSON so the admin can copy them. +HTTPBIN_REDIRECT = "https://httpbin.org/get" + +# All scopes the bots actually use — derived from AgentSlackClient + routers/podcast +BOT_SCOPES = [ + "channels:history", # conversations.history / conversations.replies + "channels:join", # conversations.join + "channels:manage", # conversations.create + "channels:read", # conversations.list + "chat:write", # chat.postMessage + "groups:history", # threads in private channels + "groups:read", # conversations.list private + "im:history", # poll_dm_messages + "im:write", # conversations.open (DMs) + "users:read", # users.info + "users:read.email", # users.lookupByEmail +] + +console = Console() + + +# --------------------------------------------------------------------------- +# Parse PILOT_LABS from source without importing the module +# (avoids pulling in SQLAlchemy and other heavy dependencies) +# --------------------------------------------------------------------------- + +def load_pilot_labs() -> list[dict]: + import ast + src = Path(__file__).parent.parent / "src" / "agent" / "simulation.py" + tree = ast.parse(src.read_text()) + for node in ast.walk(tree): + if ( + isinstance(node, ast.Assign) + and len(node.targets) == 1 + and isinstance(node.targets[0], ast.Name) + and node.targets[0].id == "PILOT_LABS" + ): + return ast.literal_eval(node.value) + raise RuntimeError("PILOT_LABS not found in src/agent/simulation.py") + + +# --------------------------------------------------------------------------- +# Slack API helpers +# --------------------------------------------------------------------------- + +def lookup_team_id(existing_env: dict) -> str | None: + """Call auth.test on the first valid bot token to get the workspace team_id.""" + for key, val in existing_env.items(): + if ( + key.upper().startswith("SLACK_BOT_TOKEN_") + and val + and val.startswith("xoxb-") + and not val.startswith("xoxb-placeholder") + ): + resp = httpx.post( + f"{SLACK_API}/auth.test", + headers={"Authorization": f"Bearer {val}"}, + timeout=10, + ) + data = resp.json() + if data.get("ok"): + return data.get("team_id") + return None + + +def rotate_config_token(refresh_token: str) -> tuple[str, str]: + """Rotate the app-config token. Returns (new_access_token, new_refresh_token).""" + resp = httpx.post( + f"{SLACK_API}/tooling.tokens.rotate", + data={"refresh_token": refresh_token}, + timeout=15, + ) + data = resp.json() + if not data.get("ok"): + raise RuntimeError(f"tooling.tokens.rotate failed: {data.get('error')}") + return data["token"], data["refresh_token"] + + +def create_app( + config_token: str, + agent_id: str, + bot_name: str, + pi_name: str, + redirect_uri: str, + max_rate_limit_retries: int = 5, +) -> dict: + """ + Create one Slack app via the Manifest API. + Returns a dict with app_id, client_id, client_secret, oauth_url. + Retries on rate-limit responses only; all other errors raise immediately. + """ + manifest = { + "display_information": { + "name": bot_name, + "description": f"LabBot agent for {pi_name}", + }, + "features": { + "bot_user": { + "display_name": bot_name, + "always_online": False, + } + }, + "oauth_config": { + "redirect_urls": [redirect_uri], + "scopes": {"bot": BOT_SCOPES}, + }, + "settings": { + "org_deploy_enabled": False, + "socket_mode_enabled": False, + "token_rotation_enabled": False, + }, + } + for attempt in range(max_rate_limit_retries): + resp = httpx.post( + f"{SLACK_API}/apps.manifest.create", + headers={"Authorization": f"Bearer {config_token}"}, + json={"manifest": manifest}, + timeout=20, + ) + data = resp.json() + if data.get("ok"): + creds = data["credentials"] + return { + "agent_id": agent_id, + "bot_name": bot_name, + "pi_name": pi_name, + "app_id": data["app_id"], + "client_id": creds["client_id"], + "client_secret": creds["client_secret"], + "oauth_url": data["oauth_authorize_url"], + } + if data.get("error") == "ratelimited": + wait = int(data.get("retry_after", 0) or resp.headers.get("Retry-After", 60)) + console.print(f" [yellow]rate limited — waiting {wait}s before retrying {bot_name}…[/yellow]") + time.sleep(wait) + else: + detail = data.get("errors") or data.get("error", "unknown") + raise RuntimeError(f"apps.manifest.create failed: {detail}") + raise RuntimeError(f"apps.manifest.create: still rate-limited after {max_rate_limit_retries} retries") + + +def exchange_code( + client_id: str, + client_secret: str, + code: str, + redirect_uri: str, +) -> str: + """Exchange a temporary OAuth code for a bot token. Returns xoxb-... string.""" + resp = httpx.post( + f"{SLACK_API}/oauth.v2.access", + data={ + "client_id": client_id, + "client_secret": client_secret, + "code": code, + "redirect_uri": redirect_uri, + }, + timeout=15, + ) + data = resp.json() + if not data.get("ok"): + raise RuntimeError(f"oauth.v2.access failed: {data.get('error')}") + token = data.get("access_token", "") + if not token.startswith("xoxb-"): + raise RuntimeError(f"Unexpected token format: {token[:20]}...") + return token + + +# --------------------------------------------------------------------------- +# OAuth callback HTTP server +# --------------------------------------------------------------------------- + +class _CallbackHandler(BaseHTTPRequestHandler): + """ + Handles GET /oauth/callback?code=...&state= + Exchanges the code for a token and writes it to .env. + """ + + # Shared state injected before server starts + pending: dict = {} # agent_id -> {bot_name, client_id, client_secret} + received: dict = {} # agent_id -> xoxb-token + env_file: str = ".env" + redirect_uri: str = "" + + def do_GET(self): + parsed = urllib.parse.urlparse(self.path) + if parsed.path != CALLBACK_PATH: + self._html(404, "

404 Not found

") + return + + params = dict(urllib.parse.parse_qsl(parsed.query)) + code = params.get("code") + error = params.get("error") + agent_id = params.get("state") + + if error: + self._html(400, f"

Slack returned an error: {error}

") + return + + if not code or not agent_id: + self._html(400, "

Missing code or state parameter

") + return + + info = self.pending.get(agent_id) + if not info: + self._html(400, f"

Unknown agent_id in state: {agent_id!r}

") + return + + if agent_id in self.received: + self._html(200, f"

{info['bot_name']} already installed — duplicate callback ignored.

") + return + + try: + token = exchange_code( + info["client_id"], info["client_secret"], code, self.redirect_uri + ) + except Exception as exc: + console.print(f"[red]Token exchange failed for {agent_id}: {exc}[/red]") + self._html(500, f"

Token exchange failed: {exc}

") + return + + env_key = f"SLACK_BOT_TOKEN_{agent_id.upper()}" + set_key(self.env_file, env_key, token, quote_mode="never") + self.received[agent_id] = token + + remaining = len(self.pending) - len(self.received) + console.print(f"[green]✓[/green] [bold]{info['bot_name']}[/bold] → {env_key}") + self._html(200, f""" +

✅ {info['bot_name']} installed!

+

Token written to .env as {env_key}

+

{remaining} bot(s) remaining. You may close this tab.

+ """) + + def _html(self, code: int, body: str): + content = ( + "" + + body + + "" + ).encode() + self.send_response(code) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(content))) + self.end_headers() + self.wfile.write(content) + + def log_message(self, *_args): + pass # suppress default access log noise + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def _run_export_urls(args: argparse.Namespace) -> None: + """Create Slack apps and export OAuth URLs for a remote admin to approve.""" + pilot_labs = load_pilot_labs() + existing_env = dotenv_values(args.env_file) + + tokenized = { + k[len("SLACK_BOT_TOKEN_"):].lower() + for k, v in existing_env.items() + if k.upper().startswith("SLACK_BOT_TOKEN_") + and v and not v.startswith("xoxb-placeholder") + } + missing = [lab for lab in pilot_labs if lab["id"] not in tokenized] + + if not missing: + console.print("[green]All bots already have tokens. Nothing to do.[/green]") + return + + config_token = existing_env.get("SLACK_CONFIG_TOKEN", "").strip() + refresh_token = existing_env.get("SLACK_CONFIG_REFRESH_TOKEN", "").strip() + if not config_token: + console.print("[bold red]SLACK_CONFIG_TOKEN is not set in .env[/bold red]") + sys.exit(1) + + if refresh_token: + console.print("Rotating config token...") + try: + config_token, new_refresh = rotate_config_token(refresh_token) + set_key(args.env_file, "SLACK_CONFIG_TOKEN", config_token, quote_mode="never") + set_key(args.env_file, "SLACK_CONFIG_REFRESH_TOKEN", new_refresh, quote_mode="never") + console.print("[green]Config token rotated.[/green]") + except Exception as exc: + console.print(f"[yellow]Token rotation failed ({exc}); using existing token.[/yellow]") + + team_id = args.team_id + if not team_id: + team_id = lookup_team_id(existing_env) + + console.print(f"\nCreating {len(missing)} Slack app(s)...\n") + created: list[dict] = [] + failed = 0 + for i, lab in enumerate(missing): + try: + app = create_app(config_token, lab["id"], lab["name"], lab["pi"], HTTPBIN_REDIRECT) + created.append(app) + console.print(f" [green]{i+1:2d}.[/green] [bold]{app['bot_name']}[/bold] (app {app['app_id']})") + except Exception as exc: + console.print(f" [red]failed[/red] {lab['name']}: {exc}") + failed += 1 + if i < len(missing) - 1: + time.sleep(12) + + if not created: + console.print("[red]No apps created. Exiting.[/red]") + sys.exit(1) + + STATE_FILE.write_text(json.dumps(created, indent=2)) + console.print(f"\n[green]Credentials saved to {STATE_FILE}[/green]") + + def _oauth_url(app: dict) -> str: + extra = {"state": app["agent_id"], "redirect_uri": HTTPBIN_REDIRECT} + if team_id: + extra["team"] = team_id + return app["oauth_url"] + "&" + urllib.parse.urlencode(extra) + + lines = [] + console.print("\n[bold yellow]Send these URLs to the workspace admin.[/bold yellow]") + console.print("After clicking Allow, they will land on httpbin.org showing JSON like:") + console.print(' {"args": {"code": "abc123", "state": "su"}, ...}') + console.print("Ask them to send back a file with one [bold]agent_id:code[/bold] per line.\n") + + for app in created: + url = _oauth_url(app) + lines.append(f"{app['bot_name']} ({app['agent_id']}):\n {url}\n") + console.print(f"[cyan]{app['bot_name']}[/cyan] ({app['agent_id']}):") + console.print(f" {url}\n") + + out_file = Path("oauth_urls.txt") + out_file.write_text("\n".join(lines)) + console.print(f"[green]URLs also saved to {out_file}[/green]") + if failed: + console.print(f"[yellow]{failed} app(s) failed — re-run to retry.[/yellow]") + + +def _parse_codes_file(text: str) -> list[tuple[str, str]]: + """Parse a codes file into (agent_id, code) pairs. + + Supports two formats (auto-detected): + + 1. Simple — one agent_id:code per line: + cline:10935961... + su:abc123... + + 2. httpbin JSON — the raw JSON response from httpbin.org/get, or multiple + responses separated by lines containing only '---': + {"args": {"code": "10935961...", "state": "cline"}, ...} + --- + {"args": {"code": "abc123...", "state": "su"}, ...} + """ + text = text.strip() + pairs: list[tuple[str, str]] = [] + + # Split on --- separators to handle multiple httpbin blobs + blocks = [b.strip() for b in text.split("---") if b.strip()] + + for block in blocks: + # Try JSON parse first (httpbin format) + if block.startswith("{"): + try: + data = json.loads(block) + args = data.get("args", {}) + code = args.get("code", "").strip() + agent_id = args.get("state", "").strip() + if code and agent_id: + pairs.append((agent_id, code)) + continue + except json.JSONDecodeError: + pass + + # Fall back to line-by-line parsing (URL or agent_id:code) + for line in block.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + # httpbin URL: https://httpbin.org/get?code=...&state=... + if line.startswith("http"): + parsed = urllib.parse.urlparse(line) + params = dict(urllib.parse.parse_qsl(parsed.query)) + code = params.get("code", "").strip() + agent_id = params.get("state", "").strip() + if code and agent_id: + pairs.append((agent_id, code)) + else: + console.print(f"[yellow]Skipping URL with missing code/state: {line!r}[/yellow]") + continue + # Simple agent_id:code + if ":" not in line: + console.print(f"[yellow]Skipping malformed line (expected agent_id:code): {line!r}[/yellow]") + continue + agent_id, code = line.split(":", 1) + pairs.append((agent_id.strip(), code.strip())) + + return pairs + + +def _run_exchange_codes(codes_file: str, env_file: str) -> None: + """Read agent_id:code pairs from a file and exchange each for an xoxb- token.""" + codes_path = Path(codes_file) + if not codes_path.exists(): + console.print(f"[red]Codes file not found: {codes_file}[/red]") + sys.exit(1) + if not STATE_FILE.exists(): + console.print(f"[red]{STATE_FILE} not found — run --export-urls first.[/red]") + sys.exit(1) + + state: list[dict] = json.loads(STATE_FILE.read_text()) + creds_by_id = {app["agent_id"]: app for app in state} + + pairs = _parse_codes_file(codes_path.read_text()) + + if not pairs: + console.print("[red]No valid codes found in file.[/red]") + sys.exit(1) + + console.print(f"\nExchanging {len(pairs)} code(s)...\n") + saved = 0 + for agent_id, code in pairs: + + app = creds_by_id.get(agent_id) + if not app: + console.print(f"[yellow]No credentials found for agent_id {agent_id!r} — skipping.[/yellow]") + continue + + try: + token = exchange_code(app["client_id"], app["client_secret"], code, HTTPBIN_REDIRECT) + except Exception as exc: + console.print(f"[red]Failed to exchange code for {agent_id}: {exc}[/red]") + continue + + env_key = f"SLACK_BOT_TOKEN_{agent_id.upper()}" + set_key(env_file, env_key, token, quote_mode="never") + console.print(f"[green]✓[/green] [bold]{app['bot_name']}[/bold] → {env_key}") + saved += 1 + + console.print(f"\n[bold]{saved}/{len(pairs)} token(s) saved to {env_file}[/bold]") + if saved == len(pairs): + STATE_FILE.unlink(missing_ok=True) + console.print("[green]All done! Restart the agent container to pick up the new tokens.[/green]") + console.print(" docker rm -f agent-run") + console.print(" docker compose up -d --build app worker") + console.print(" docker compose --profile agent run -d --name agent-run agent python -m src.agent.main --budget 0") + else: + console.print(f"[yellow]Some exchanges failed. Fix and re-run --exchange-codes with the remaining codes.[/yellow]") + console.print(f" Credentials still in {STATE_FILE} — no need to re-run --export-urls.") + + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--port", type=int, default=8888, + help="Local port for the OAuth callback server (default: 8888)", + ) + parser.add_argument( + "--env-file", default=".env", + help="Path to the .env file that will receive the new tokens (default: .env)", + ) + parser.add_argument( + "--dry-run", action="store_true", + help="Show which bots need tokens; make no API calls", + ) + parser.add_argument( + "--skip-create", action="store_true", + help=f"Skip app creation and reuse credentials from {STATE_FILE}", + ) + parser.add_argument( + "--team-id", + help="Slack workspace team ID (e.g. T012AB3CD) to pin OAuth URLs to the right workspace. " + "Auto-detected from an existing bot token if not provided.", + ) + parser.add_argument( + "--export-urls", action="store_true", + help=( + "Create Slack apps and export OAuth URLs for a remote admin to approve. " + f"Uses {HTTPBIN_REDIRECT} as the redirect URI so the admin sees the code " + "on-screen. URLs are printed and saved to oauth_urls.txt. " + "Run --exchange-codes after the admin returns the codes." + ), + ) + parser.add_argument( + "--exchange-codes", + metavar="CODES_FILE", + help=( + "Exchange OAuth codes provided by a remote admin for bot tokens. " + "CODES_FILE must contain one 'agent_id:code' entry per line. " + f"Requires {STATE_FILE} from a previous --export-urls run." + ), + ) + parser.add_argument( + "--exchange-urls", + metavar="URLS_FILE", + help=( + "Exchange OAuth codes extracted from httpbin redirect URLs. " + "URLS_FILE must contain one full httpbin URL per line, e.g.: " + "https://httpbin.org/get?code=...&state=su. " + f"Requires {STATE_FILE} from a previous --export-urls run." + ), + ) + args = parser.parse_args() + + redirect_uri = f"http://localhost:{args.port}{CALLBACK_PATH}" + + # ----------------------------------------------------------------------- + # --export-urls: create apps and write OAuth URLs for a remote admin + # ----------------------------------------------------------------------- + if args.export_urls: + _run_export_urls(args) + return + + # ----------------------------------------------------------------------- + # --exchange-codes: exchange codes file returned by the admin for tokens + # ----------------------------------------------------------------------- + if args.exchange_codes: + _run_exchange_codes(args.exchange_codes, args.env_file) + return + + if args.exchange_urls: + _run_exchange_codes(args.exchange_urls, args.env_file) + return + + # ----------------------------------------------------------------------- + # 1. Determine which bots are missing tokens + # ----------------------------------------------------------------------- + pilot_labs = load_pilot_labs() + existing_env = dotenv_values(args.env_file) + + team_id = args.team_id + if not team_id and not args.dry_run: + team_id = lookup_team_id(existing_env) + if team_id: + console.print(f"Detected workspace team ID: [cyan]{team_id}[/cyan]") + else: + console.print("[yellow]Could not detect team ID — OAuth links may open the wrong workspace.[/yellow]") + console.print(" Pass --team-id T... to fix this.") + + tokenized = { + k[len("SLACK_BOT_TOKEN_"):].lower() + for k, v in existing_env.items() + if k.upper().startswith("SLACK_BOT_TOKEN_") + and v + and not v.startswith("xoxb-placeholder") + } + + missing = [lab for lab in pilot_labs if lab["id"] not in tokenized] + + if not missing: + console.print("[green]All bots already have tokens. Nothing to do.[/green]") + return + + t = Table(title=f"{len(missing)} bot(s) need Slack tokens", show_lines=True) + t.add_column("agent_id", style="cyan") + t.add_column("Bot name") + t.add_column("PI") + for lab in missing: + t.add_row(lab["id"], lab["name"], lab["pi"]) + console.print(t) + + if args.dry_run: + console.print("[yellow]--dry-run active: no API calls made.[/yellow]") + return + + # ----------------------------------------------------------------------- + # 2. Obtain / rotate config token + # ----------------------------------------------------------------------- + config_token = existing_env.get("SLACK_CONFIG_TOKEN", "").strip() + refresh_token = existing_env.get("SLACK_CONFIG_REFRESH_TOKEN", "").strip() + + if not config_token: + console.print("\n[bold red]SLACK_CONFIG_TOKEN is not set in .env[/bold red]") + console.print( + " 1. Open https://api.slack.com/apps in a browser\n" + " 2. Click 'Your App Configuration Tokens'\n" + " 3. Click 'Generate Token' for your workspace\n" + " 4. Copy the token (xoxe-...) and refresh token into .env:\n" + " SLACK_CONFIG_TOKEN=xoxe-...\n" + " SLACK_CONFIG_REFRESH_TOKEN=xoxe-...\n" + ) + sys.exit(1) + + if refresh_token: + console.print("Rotating config token...") + try: + config_token, new_refresh = rotate_config_token(refresh_token) + set_key(args.env_file, "SLACK_CONFIG_TOKEN", config_token, quote_mode="never") + set_key(args.env_file, "SLACK_CONFIG_REFRESH_TOKEN", new_refresh, quote_mode="never") + console.print("[green]Config token rotated and saved.[/green]") + except Exception as exc: + console.print(f"[yellow]Token rotation failed ({exc}); using existing token.[/yellow]") + + # ----------------------------------------------------------------------- + # 3. Start OAuth callback server (before app creation so URLs work immediately) + # ----------------------------------------------------------------------- + _CallbackHandler.pending = {} + _CallbackHandler.received = {} + _CallbackHandler.env_file = args.env_file + _CallbackHandler.redirect_uri = redirect_uri + + server = HTTPServer(("localhost", args.port), _CallbackHandler) + threading.Thread(target=server.serve_forever, daemon=True).start() + console.print(f"\n[bold]OAuth callback server running on http://localhost:{args.port}[/bold]") + console.print( + "\n[bold yellow]Open each URL in a browser while signed into the workspace.[/bold yellow]\n" + "Each approval redirects back here and saves the token to .env automatically.\n" + ) + + # ----------------------------------------------------------------------- + # 4. Create apps (or load previous run's state) and print URLs as they appear + # ----------------------------------------------------------------------- + def _oauth_url(app: dict) -> str: + extra = {"state": app["agent_id"], "redirect_uri": redirect_uri} + if team_id: + extra["team"] = team_id + return app["oauth_url"] + "&" + urllib.parse.urlencode(extra) + + created: list[dict] = [] + if args.skip_create: + if not STATE_FILE.exists(): + console.print(f"[red]--skip-create: {STATE_FILE} not found. Run without that flag first.[/red]") + server.shutdown() + sys.exit(1) + all_state: list[dict] = json.loads(STATE_FILE.read_text()) + missing_ids = {lab["id"] for lab in missing} + created = [a for a in all_state if a["agent_id"] in missing_ids] + console.print(f"Loaded {len(created)} app credential(s) from {STATE_FILE}\n") + for i, app in enumerate(created, 1): + _CallbackHandler.pending[app["agent_id"]] = { + "bot_name": app["bot_name"], + "client_id": app["client_id"], + "client_secret": app["client_secret"], + } + console.print(f" [cyan]{i:2d}.[/cyan] [bold]{app['bot_name']}[/bold] ({app['pi_name']})") + console.print(f" {_oauth_url(app)}\n") + else: + failed_count = 0 + for i, lab in enumerate(missing): + try: + app = create_app(config_token, lab["id"], lab["name"], lab["pi"], redirect_uri) + created.append(app) + _CallbackHandler.pending[app["agent_id"]] = { + "bot_name": app["bot_name"], + "client_id": app["client_id"], + "client_secret": app["client_secret"], + } + console.print(f" [green]{i+1:2d}.[/green] [bold]{app['bot_name']}[/bold] (app {app['app_id']})") + console.print(f" {_oauth_url(app)}\n") + except Exception as exc: + console.print(f" [red]failed[/red] {lab['name']}: {exc}") + failed_count += 1 + # Slack's Manifest API allows ~10 req/min; 12s between calls stays well under + if i < len(missing) - 1: + time.sleep(12) + + if created: + STATE_FILE.write_text(json.dumps(created, indent=2)) + if failed_count: + console.print(f"[yellow]{failed_count} app(s) failed to create — fix errors and re-run.[/yellow]") + + if not created: + console.print("[red]No apps available for OAuth. Exiting.[/red]") + server.shutdown() + sys.exit(1) + + # ----------------------------------------------------------------------- + # 5. Wait for all OAuth callbacks + # ----------------------------------------------------------------------- + console.print(f"Waiting for {len(created)} installation(s)… (Ctrl-C to stop early)\n") + try: + while len(_CallbackHandler.received) < len(created): + time.sleep(0.5) + except KeyboardInterrupt: + console.print("\n[yellow]Interrupted.[/yellow]") + finally: + server.shutdown() + + done = len(_CallbackHandler.received) + total = len(created) + console.print(f"\n[bold]Finished: {done}/{total} token(s) saved to {args.env_file}[/bold]") + + if done < total: + outstanding = [a["bot_name"] for a in created if a["agent_id"] not in _CallbackHandler.received] + console.print(f"[yellow]Still missing: {', '.join(outstanding)}[/yellow]") + console.print(f"Re-run with [bold]--skip-create[/bold] to retry without recreating the apps.") + else: + if STATE_FILE.exists(): + STATE_FILE.unlink() + console.print(f"[green]All done! Restart the agent container to pick up the new tokens.[/green]") + console.print(" docker rm -f agent-run") + console.print(" docker compose up -d --build app worker") + console.print(" docker compose --profile agent run -d --name agent-run agent python -m src.agent.main --budget 0") + + +if __name__ == "__main__": + main() diff --git a/scripts/test_podcast_su.py b/scripts/test_podcast_su.py new file mode 100644 index 0000000..6795f27 --- /dev/null +++ b/scripts/test_podcast_su.py @@ -0,0 +1,143 @@ +"""One-shot test: run the podcast pipeline for agent 'su' only. + +Outputs: + .labbot-tests/su-summary-.txt — generated text summary + .labbot-tests/su-audio-.mp3 — TTS audio (if MISTRAL_API_KEY is set) + +Usage: + DATABASE_URL=postgresql+asyncpg://copi:copi@localhost:5432/copi \ + python scripts/test_podcast_su.py +""" + +import asyncio +import logging +import os +import shutil +from datetime import date +from pathlib import Path + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s: %(message)s", +) +logger = logging.getLogger(__name__) + +OUTPUT_DIR = Path(".labbot-tests") +AUDIO_DIR = Path("data/podcast_audio") + + +async def run(): + from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine + from sqlalchemy.orm import sessionmaker + + from src.config import get_settings + from src.podcast.pipeline import ( + _generate_summary, + _load_podcast_preferences, + _load_public_profile, + _parse_profile_markdown, + _select_article, + _try_fetch_full_text, + ) + from src.podcast.tts_utils import get_audio_duration_seconds + from src.podcast.pubmed_search import build_queries, fetch_candidates + from src.podcast.state import get_delivered_pmids, record_delivery + + settings = get_settings() + agent_id = "su" + today = date.today() + OUTPUT_DIR.mkdir(exist_ok=True) + + logger.info("=== LabBot Podcast test run for agent: %s ===", agent_id) + + # 1. Load profiles + profile_text = _load_public_profile(agent_id) + if not profile_text: + logger.error("No public profile found for agent: %s", agent_id) + return + logger.info("Loaded profile (%d chars)", len(profile_text)) + + preferences_text = await _load_podcast_preferences(agent_id) + if preferences_text: + logger.info("Loaded podcast preferences (%d chars)", len(preferences_text)) + else: + logger.info("No podcast preferences found for agent: %s", agent_id) + + # 2. Build queries and fetch candidates + profile_dict = _parse_profile_markdown(profile_text) + queries = build_queries(profile_dict) + logger.info("Search queries: %s", queries) + + already_delivered = get_delivered_pmids(agent_id) + logger.info("Already delivered PMIDs: %s", already_delivered) + + candidates = await fetch_candidates( + queries, + already_delivered=already_delivered, + days=settings.podcast_search_window_days, + max_total=settings.podcast_max_candidates, + ) + logger.info("Fetched %d candidates", len(candidates)) + if not candidates: + logger.error("No candidate articles found — aborting") + return + + # 3. LLM article selection + selected, justification = await _select_article(profile_text, candidates, agent_id, preferences_text) + if selected is None: + logger.error("No article selected — aborting") + return + pmid = selected.get("pmid", "") + logger.info("Selected PMID: %s", pmid) + logger.info("Justification: %s", justification) + + # 4. Fetch full text + full_text = await _try_fetch_full_text(pmid) + logger.info("Full text fetched: %s", bool(full_text)) + + # 5. Generate text summary + summary = await _generate_summary(profile_text, selected, full_text, agent_id, preferences_text) + if not summary: + logger.error("Summary generation failed — aborting") + return + + summary_path = OUTPUT_DIR / f"su-summary-{today.isoformat()}.txt" + summary_path.write_text(summary, encoding="utf-8") + logger.info("Summary written to %s", summary_path) + print("\n" + "=" * 60) + print("TEXT SUMMARY") + print("=" * 60) + print(summary) + print("=" * 60 + "\n") + + # 6. Generate audio — dispatch to backend configured by PODCAST_TTS_BACKEND + if settings.podcast_tts_backend == "local": + from src.podcast.local_tts import generate_audio + logger.info("TTS backend: local vLLM-Omni (%s:%s)", settings.local_tts_host, settings.local_tts_port) + elif settings.podcast_tts_backend == "openai": + from src.podcast.openai_tts import generate_audio + logger.info("TTS backend: OpenAI TTS") + else: + from src.podcast.mistral_tts import generate_audio + logger.info("TTS backend: Mistral AI (%s)", settings.mistral_tts_model) + + audio_src = AUDIO_DIR / agent_id / f"{today.isoformat()}.mp3" + audio_ok = await generate_audio(summary, agent_id, audio_src) + + if audio_ok: + audio_dest = OUTPUT_DIR / f"su-audio-{today.isoformat()}.mp3" + shutil.copy2(audio_src, audio_dest) + duration = get_audio_duration_seconds(audio_src) + logger.info("Audio saved to %s (duration: %ss)", audio_dest, duration) + else: + logger.warning("Audio generation failed (backend: %s)", settings.podcast_tts_backend) + + logger.info("=== Test run complete ===") + logger.info(" PMID: %s", pmid) + logger.info(" Summary: %s", summary_path) + if audio_ok: + logger.info(" Audio: %s", audio_dest) + + +if __name__ == "__main__": + asyncio.run(run()) diff --git a/specs/admin-dashboard.md b/specs/admin-dashboard.md index 2d9cd8f..7a077a7 100644 --- a/specs/admin-dashboard.md +++ b/specs/admin-dashboard.md @@ -140,7 +140,74 @@ Analytics on agent-to-agent thread conversations and outcomes. **Export:** HTML and plain text export options for proposal review. -### 7. LLM Call Logs (`/admin/llm-calls`) +### 7. Matchmaker (`/admin/matchmaker`) + +Admin tool for generating collaboration proposals between two PIs on demand, without running an agent simulation. See `labbot-matchmaker.md` for the full specification. + +**Generate form (top of page):** +- Two dropdowns listing all users with a complete `ResearcherProfile`, sorted by name +- Client-side enforcement: same user cannot be selected in both dropdowns; Generate button disabled until both are selected +- On submit: POSTs to `/admin/matchmaker/generate`, shows spinner, redirects to detail view on success + +**Proposals table:** +- Confidence badge (High = green, Moderate = yellow, Speculative = gray) +- PI A / PI B names +- Proposal title (truncated) +- Generated timestamp +- View / Delete actions + +**Filters:** +- PI multi-select (matches either side) +- Confidence filter + +**Row click** → proposal detail page (`/admin/matchmaker/{id}`) + +**Proposal detail (`/admin/matchmaker/{id}`):** +- Header: PI A × PI B, confidence badge, generated timestamp, token counts +- Full proposal rendered as markdown +- Delete button + +### 8. PI Proposal Evaluations (`/admin/evaluations`) + +Full visibility into all NIH-style PI evaluations submitted through the `/proposals` tab. This is the only place in the system where proposal origin (agent vs. matchmaker) is revealed — the PI-facing evaluation flow deliberately obscures it. + +See `proposal-review-pi.md` for the complete specification of the evaluation form and data model. This section covers the admin view only. + +**Summary cards:** +- Total evaluations submitted +- Evaluations this month +- Proposals with ≥1 evaluation vs. total proposals in the system +- Mean overall impact score (all time, shown as X.X / 9) + +**Evaluations table** — one row per `PiProposalEvaluation`: + +| Column | Notes | +|---|---| +| Evaluator | PI's name | +| Proposal title | From the linked `ThreadDecision` or `MatchmakerProposal` | +| Origin | `Agent` (blue) or `Matchmaker` (purple) badge | +| Collaborator | The other PI named in the proposal | +| Sig. / Inn. / App. / Inv. / Env. | Individual criterion scores 1–9 | +| Impact | `score_overall_impact`, bold; color-coded green (1–3), yellow (4–6), red (7–9) | +| Submitted | `evaluated_at` | +| Updated | `updated_at` if the PI amended their evaluation | +| Actions | "View" → expanded detail | + +**Row detail (inline expand or modal):** +- All six scores with their associated comments +- Full proposal body rendered as markdown +- Evaluator metadata (name, institution, submission/amendment timestamps) + +**Filters:** +- Evaluator (multi-select user dropdown) +- Origin: All / Agent only / Matchmaker only +- Overall Impact range (numeric min/max, 1–9) +- Date range (evaluated_at from/to) + +**Export:** +`GET /admin/evaluations/export.json` — downloads a JSON file of all rows matching the active filter state. Filters are passed as query params so the export always reflects what is on screen. See `proposal-review-pi.md §Admin Page` for the full JSON schema. + +### 10. LLM Call Logs (`/admin/llm-calls`) Debugging view for all LLM API calls. @@ -153,7 +220,7 @@ Debugging view for all LLM API calls. - Latency (ms) - System prompt and response (expandable) -### 8. Access Requests (`/admin/access-requests`) +### 11. Access Requests (`/admin/access-requests`) Pre-release access gate management. @@ -171,7 +238,7 @@ Pre-release access gate management. - Add ORCID + note form - Remove ORCID button -### 9. Waitlist (`/admin/waitlist`) +### 12. Waitlist (`/admin/waitlist`) Lead-capture signups from the public landing page. @@ -183,7 +250,7 @@ Lead-capture signups from the public landing page. No outbound email is sent automatically — the admin uses the export to reach out manually, then marks rows contacted. -### 10. User Impersonation +### 13. User Impersonation Admins can assume the identity of any user to see the app as they see it. @@ -215,6 +282,12 @@ Admins can assume the identity of any user to see the app as they see it. | `POST /admin/agents/{id}/approve` | Approve pending agent | | `GET /admin/discussions` | Thread discussions and outcomes | | `GET /admin/discussions/export` | Export discussions (HTML/text) | +| `GET /admin/matchmaker` | Matchmaker tab with generate form and proposals table | +| `POST /admin/matchmaker/generate` | Run LLM pipeline and store result | +| `GET /admin/matchmaker/{id}` | Proposal detail view | +| `POST /admin/matchmaker/{id}/delete` | Delete a proposal | +| `GET /admin/evaluations` | PI proposal evaluations overview | +| `GET /admin/evaluations/export.json` | JSON export of evaluations (filter params as query string) | | `GET /admin/access-requests` | Pending access requests + allowlist management | | `POST /admin/access-requests/{user_id}/approve` | Approve a pending user | | `POST /admin/access-requests/{user_id}/deny` | Deny a pending user | diff --git a/specs/cohort-system.md b/specs/cohort-system.md new file mode 100644 index 0000000..0d65b99 --- /dev/null +++ b/specs/cohort-system.md @@ -0,0 +1,438 @@ +# Cohort System Specification + +## Overview + +A cohort is a named group of agents whose members are permitted to interact with each other during simulation. The purpose is purely practical: prevent agents from spending LLM turns scanning, activating threads with, or tagging agents they will never productively engage. Cohorts are orthogonal to Slack channels — channel subscriptions remain unchanged; cohort membership only gates whether one agent will *act on* another agent's activity. + +Agents may belong to any number of cohorts. Cohort assignments are admin-managed and can change while a simulation is running. Interaction limits (thread count, proposal caps, budgets) remain per-agent and are shared across all cohorts an agent belongs to. + +--- + +## Goals + +- Skip Phase 2 scan evaluation of posts from non-cohort agents (save Sonnet calls) +- Skip Phase 3 thread activation from non-cohort agents (save CPU + state bloat) +- Skip Phase 5 tagging or replying to non-cohort agents (save Opus calls) +- Run N turns concurrently via a global semaphore for predictable API cost at any agent list size +- Ensure fair turn distribution across all agents via min-heap selection +- Allow membership to change mid-run without requiring a restart + +--- + +## Data Model + +### New Table: `cohorts` + +```sql +CREATE TABLE cohorts ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name TEXT NOT NULL UNIQUE, + description TEXT, + created_by UUID REFERENCES users(id) ON DELETE SET NULL, + created_at TIMESTAMP WITH TIME ZONE DEFAULT now() +); +``` + +- `name`: short slug-style identifier (e.g. `"pilot-wave-1"`, `"structural-cohort"`). Unique, immutable after creation. +- `description`: optional free-text note for admin reference. +- `created_by`: FK to the admin user who created it; nullable (SET NULL on user delete). + +### New Table: `cohort_memberships` + +```sql +CREATE TABLE cohort_memberships ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + cohort_id UUID NOT NULL REFERENCES cohorts(id) ON DELETE CASCADE, + agent_id TEXT NOT NULL, + added_by UUID REFERENCES users(id) ON DELETE SET NULL, + added_at TIMESTAMP WITH TIME ZONE DEFAULT now(), + UNIQUE (cohort_id, agent_id) +); +``` + +- `agent_id`: matches `AgentRegistry.agent_id` (string, e.g. `"su"`, `"wiseman"`). No FK enforced — agent records may not exist at table creation time; the application validates at join time. +- Composite unique constraint prevents duplicate membership. +- Cascade delete: removing a cohort removes all its memberships. + +### Migration + +File: `alembic/versions/0023_add_cohorts.py` + +```python +def upgrade(): + op.create_table("cohorts", ...) + op.create_table("cohort_memberships", ...) + op.create_index("ix_cohort_memberships_cohort_id", "cohort_memberships", ["cohort_id"]) + op.create_index("ix_cohort_memberships_agent_id", "cohort_memberships", ["agent_id"]) + +def downgrade(): + op.drop_table("cohort_memberships") + op.drop_table("cohorts") +``` + +### SQLAlchemy Models + +`src/models/cohort.py`: + +```python +class Cohort(Base): + __tablename__ = "cohorts" + id: Mapped[UUID] = mapped_column(primary_key=True, default=uuid4) + name: Mapped[str] = mapped_column(unique=True) + description: Mapped[str | None] + created_by: Mapped[UUID | None] = mapped_column(ForeignKey("users.id", ondelete="SET NULL")) + created_at: Mapped[datetime] = mapped_column(default=func.now()) + memberships: Mapped[list["CohortMembership"]] = relationship(back_populates="cohort", cascade="all, delete-orphan") + +class CohortMembership(Base): + __tablename__ = "cohort_memberships" + id: Mapped[UUID] = mapped_column(primary_key=True, default=uuid4) + cohort_id: Mapped[UUID] = mapped_column(ForeignKey("cohorts.id", ondelete="CASCADE")) + agent_id: Mapped[str] + added_by: Mapped[UUID | None] = mapped_column(ForeignKey("users.id", ondelete="SET NULL")) + added_at: Mapped[datetime] = mapped_column(default=func.now()) + cohort: Mapped["Cohort"] = relationship(back_populates="memberships") +``` + +Export from `src/models/__init__.py` alongside existing models. + +--- + +## Agent Changes + +### `src/agent/agent.py` + +Add one field to `Agent.__init__`: + +```python +self.cohort_ids: set[str] = set() # populated by SimulationEngine at startup and on resync +``` + +Add one helper method: + +```python +def can_interact(self, other: "Agent") -> bool: + """True if the two agents share at least one cohort (or if either has no cohort assignments).""" + if not self.cohort_ids or not other.cohort_ids: + return True # uncohorted agents interact with everyone — backward-compatible default + return bool(self.cohort_ids & other.cohort_ids) +``` + +The fallback `return True` when either agent has no cohorts assigned preserves all-vs-all behaviour for agents not yet assigned to any cohort, preventing accidental silencing. + +--- + +## Simulation Engine Changes + +### `src/agent/main.py` + +#### 1. Cohort Loading at Startup + +After agents are loaded and before the main loop, query cohort memberships: + +```python +async def _load_cohort_memberships(self): + async with self._session_factory() as db: + rows = await db.execute( + select(CohortMembership.agent_id, CohortMembership.cohort_id) + ) + # Clear and rebuild + for agent in self.agents.values(): + agent.cohort_ids = set() + for agent_id, cohort_id in rows: + if agent_id in self.agents: + self.agents[agent_id].cohort_ids.add(cohort_id) + + self._last_cohort_sync = time.time() + logger.info("Cohort memberships loaded for %d agents", sum(1 for a in self.agents.values() if a.cohort_ids)) +``` + +No index structure is needed — the interaction gate operates purely via `agent.cohort_ids` set intersection at the point of interaction. Turn dispatch is global and cohort-unaware (see Section 6). + +#### 2. Dynamic Membership Resync + +Every 60 seconds (checked at the top of each main-loop round), re-run `_load_cohort_memberships()` and rebuild `_cohort_members`. This is a full replace, not a diff — simple and correct. + +```python +COHORT_RESYNC_INTERVAL = 60 # seconds + +if time.time() - self._last_cohort_sync >= COHORT_RESYNC_INTERVAL: + await _load_cohort_memberships() + _rebuild_cohort_index() +``` + +Resync only updates `agent.cohort_ids` and `_cohort_members`. It does not touch `AgentState` or close any active threads — existing open threads between agents who have since been removed from a shared cohort are allowed to conclude naturally. + +#### 3. Interaction Gate — Phase 2 + +In `_phase2_scan_filter()`, filter incoming posts before building the LLM prompt: + +```python +new_posts = [ + p for p in new_posts + if self._sender_can_interact(agent, p.sender_agent_id) +] +``` + +Where: + +```python +def _sender_can_interact(self, agent: Agent, sender_id: str | None) -> bool: + if sender_id is None: + return True # PI/human message — always show + sender = self.agents.get(sender_id) + if sender is None: + return True # unknown sender — don't filter + return agent.can_interact(sender) +``` + +#### 4. Interaction Gate — Phase 3 + +In `_phase3_activate_threads()`, tag-based and reply-based activation both check: + +```python +sender = self.agents.get(entry.sender_agent_id) +if sender and not agent.can_interact(sender): + continue # skip activation — not a cohort-mate +``` + +This applies before any other checks (thread cap, thread participation rules, etc.) to fail fast. + +#### 5. Interaction Gate — Phase 5 + +In `_phase5_new_post()`, when filtering `available_posts`: + +```python +sender = self.agents.get(post.sender_agent_id) +if sender and not agent.can_interact(sender): + agent.state.interesting_posts = [ + p for p in agent.state.interesting_posts if p.post_id != post.post_id + ] + continue # prune stale post — sender is no longer a cohort-mate +``` + +When the LLM response names a `tagged_agent` for a new top-level post: + +```python +if tagged_agent: + target = self.agents.get(tagged_agent) + if target and not agent.can_interact(target): + logger.debug("%s: cohort gate blocked tag of %s in phase5", agent.agent_id, tagged_agent) + return +``` + +#### 6. Turn Selection: Min-Heap + Global Semaphore + +Replace the current O(n) weighted-random `_select_agent()` with a **min-heap keyed by `last_selected`** and a **global semaphore of width `concurrent_turns`**. + +**Why min-heap over weighted random:** +The current weighted-random gives probabilistic fairness but can starve agents at large list sizes, particularly when `phase5_skip_probability` is non-zero (fast no-op turns let an agent re-enter the lottery immediately). A min-heap guarantees the longest-waiting eligible agent always gets the next slot — O(log n) selection, deterministic fairness. + +**Selection and dispatch:** + +```python +import heapq + +def _build_heap(self) -> list[tuple[float, Agent]]: + now = time.time() + return [ + (a.state.last_selected, a) + for a in self.agents.values() + if not a.is_paused + and self._agent_within_budget(a) + and (now - a.state.last_selected) >= settings.turn_delay_seconds + ] + +async def _run_concurrent_turns(self) -> bool: + heap = self._build_heap() + if not heap: + return False + + heapq.heapify(heap) + n = min(settings.concurrent_turns, len(heap)) + selected = [heapq.heappop(heap)[1] for _ in range(n)] + + results = await asyncio.gather( + *[self._run_turn(agent) for agent in selected], + return_exceptions=True, + ) + + did_any_work = False + for agent, result in zip(selected, results): + agent.state.last_selected = time.time() + if isinstance(result, Exception): + logger.exception("Turn error for %s", agent.agent_id) + elif result: + did_any_work = True + + return did_any_work +``` + +The main loop calls `_run_concurrent_turns()` each iteration and uses `did_any_work` to drive the existing idle-backoff logic unchanged. + +**Slack polling** continues once per round, before `_run_concurrent_turns()`, as a single sequential operation. + +**`_last_llm_caller` guard:** This guard exists to prevent the same agent from making back-to-back LLM calls in the sequential model. It is superseded by the min-heap + per-agent cooldown (`turn_delay_seconds` eligibility check) and should be removed from the concurrent path. The min-heap naturally pushes a just-selected agent to the bottom of the queue; the cooldown makes them ineligible until the delay has elapsed. + +#### 7. Phase 5 Concurrent Initiation Guard + +With N turns running concurrently, two agents can independently decide to start a new thread with each other in the same round (both see `has_pending_reply=False` and neither has an active thread with the other yet). Track in-flight pair initiations to prevent duplicate thread creation: + +```python +self._initiating_pairs: set[frozenset[str]] = set() +``` + +In `_phase5_new_post()`, before posting a reply that opens a new thread toward `target_agent_id`: + +```python +pair = frozenset([agent.agent_id, target_agent_id]) +if pair in self._initiating_pairs: + logger.debug("%s: concurrent initiation guard blocked duplicate thread with %s", agent.agent_id, target_agent_id) + return + +self._initiating_pairs.add(pair) +try: + await self._post_message(...) + # activate thread ... +finally: + self._initiating_pairs.discard(pair) +``` + +The pair is removed once the thread is activated (or on failure). Note: Phase 4 back-and-forth replies are safe without this guard — `has_pending_reply` is a logical baton held by only one side at a time, so two agents cannot both have a pending reply to each other simultaneously. + +--- + +## Admin Interface + +### Routes + +All routes are added to `src/routers/admin.py` under the `/admin/cohorts` prefix, protected by the existing `get_admin_user` dependency. + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/admin/cohorts` | List all cohorts with member counts | +| POST | `/admin/cohorts/create` | Create a new cohort | +| GET | `/admin/cohorts/{cohort_id}` | Cohort detail: members, audit log | +| POST | `/admin/cohorts/{cohort_id}/delete` | Delete cohort (cascades memberships) | +| POST | `/admin/cohorts/{cohort_id}/add-agent` | Add an agent to the cohort | +| POST | `/admin/cohorts/{cohort_id}/remove-agent` | Remove an agent from the cohort | + +POST routes redirect back to the referring page on success and render an inline error on failure (same pattern as existing admin routes). + +### Cohort List Page — `GET /admin/cohorts` + +Template: `templates/admin/cohorts.html` + +**Header:** "Cohorts" with a "New Cohort" button (opens inline form or modal). + +**Create form** (inline, collapsed by default): +- `name` (text input, required) — validated: lowercase, alphanumeric + hyphens only, max 48 chars +- `description` (textarea, optional) +- Submit → `POST /admin/cohorts/create` + +**Table: All Cohorts** + +| Column | Notes | +|--------|-------| +| Name | Link to detail page | +| Description | Truncated at 80 chars | +| Members | Count of current memberships | +| Created by | Admin user name | +| Created at | Date | +| Actions | Delete button (with confirmation; disabled if cohort has active members) | + +If no cohorts exist: empty state with "No cohorts yet. Create one above." + +### Cohort Detail Page — `GET /admin/cohorts/{cohort_id}` + +Template: `templates/admin/cohort_detail.html` + +**Header:** Cohort name + description. Delete button (top right, requires confirmation prompt via `data-confirm` attribute; only shown if member count is 0, otherwise disabled with tooltip "Remove all members first"). + +**Section: Members** + +Table of current members: + +| Column | Notes | +|--------|-------| +| Agent ID | e.g. `su`, `wiseman` | +| Bot Name | e.g. `SuBot` | +| PI Name | e.g. `Andrew Su` | +| Agent Status | `active` / `suspended` / `pending` (from AgentRegistry) | +| Added by | Admin user name | +| Added at | Date | +| Actions | "Remove" button → `POST /admin/cohorts/{cohort_id}/remove-agent` with `agent_id` | + +**Section: Add Agent** + +Dropdown of all agents *not already in this cohort*, populated from AgentRegistry. Only active agents are shown by default; a checkbox toggle shows suspended/pending agents as well. + +``` +[ Select agent ▼ ] [ Add to Cohort ] +``` + +`POST /admin/cohorts/{cohort_id}/add-agent` body: `{ agent_id: "su" }` + +If the selected agent already belongs to this cohort, return a 400 with inline error "Agent is already a member." + +**Section: Agent Cohort Map (read-only)** + +Summary table showing all active agents and which cohorts they currently belong to, for cross-reference: + +| Agent | Cohorts | +|-------|---------| +| SuBot | pilot-wave-1, structural | +| WisemanBot | pilot-wave-1 | +| LotzBot | *(none)* | + +This section is static (no editing — use individual cohort pages to manage membership). + +### Navigation + +Add "Cohorts" to the existing admin sidebar nav alongside Agents, Users, Activity, etc. + +--- + +## Configuration + +### New settings (`src/config.py`) + +```python +concurrent_turns: int = 3 # max simultaneous agent turns; overridden by active_thread_threshold at runtime +``` + +At engine startup, `concurrent_turns` is clamped to `max(concurrent_turns, active_thread_threshold)`. This keeps the two levers in proportion: if an admin raises the thread threshold to allow more simultaneous conversations, the concurrent turn capacity rises with it automatically. The `concurrent_turns` setting therefore acts as a floor, not a ceiling. + +The cohort resync interval is hardcoded as `COHORT_RESYNC_INTERVAL = 60` seconds in the engine. It can be promoted to `Settings` if operational tuning is needed. + +### `turn_delay_seconds` — Behavior Change + +**Current behavior (to be removed):** `simulation.py:360-361` applies `asyncio.sleep(turn_delay_seconds)` at the end of every productive main-loop iteration. This is a **global pause** — no Slack polling, no other agents, nothing runs during the sleep. It is 0.0 by default and has no per-agent targeting. + +**New behavior:** `turn_delay_seconds` becomes a **per-agent cooldown** enforced at selection time inside `_build_heap()`: + +```python +and (now - a.state.last_selected) >= settings.turn_delay_seconds +``` + +An agent that just completed a turn is ineligible until the cooldown has elapsed. All other agents are unaffected. The `asyncio.sleep(settings.turn_delay_seconds)` call in `simulation.py` is removed. + +This preserves the original intent (throttle individual agent tempo) while composing correctly with concurrent dispatch — N slots can stay busy while a recently-active agent sits out its cooldown. + +--- + +## Backward Compatibility + +- Agents with no cohort memberships are grouped into `"__uncohorted__"` and continue to interact with all other uncohorted agents. This means a simulation with zero cohorts defined behaves identically to the current all-vs-all system. +- `Agent.can_interact()` returns `True` when either agent has an empty `cohort_ids` set, so partially-cohorted simulations (some agents assigned, some not) do not silently break. +- No existing tables, models, or routes are modified. + +--- + +## Out of Scope + +- Agent-visible cohort concept: agents do not know which cohort a conversation was initiated from; threads are indistinguishable. +- PI-managed cohorts: only admins create and delete cohorts. PIs cannot request cohort changes. +- Per-cohort budgets or limits: all limits remain per-agent and are shared across cohorts. +- Cohort-scoped message history or separate Slack workspaces per cohort. +- Time-bounded cohort memberships (automatic expiry). diff --git a/specs/data-model.md b/specs/data-model.md index bc3adc9..122a314 100644 --- a/specs/data-model.md +++ b/specs/data-model.md @@ -93,6 +93,25 @@ PostgreSQL-backed async job queue. | started_at | timestamp | Nullable | | completed_at | timestamp | Nullable | +### MatchmakerProposal + +Admin-generated collaboration proposals produced by a single LLM call from two PIs' public and private profiles, without running an agent simulation. See `labbot-matchmaker.md`. + +| Field | Type | Notes | +|---|---|---| +| id | uuid | Primary key | +| pi_a_id | FK → User | First PI (CASCADE delete) | +| pi_b_id | FK → User | Second PI (CASCADE delete) | +| proposal_md | text | Full proposal in markdown | +| title | string(500) | Extracted from first `# heading` in proposal_md | +| confidence | string(20) | `high` / `moderate` / `speculative` | +| llm_model | string(100) | Model used (e.g. `claude-opus-4-7`) | +| input_tokens | integer | Nullable. Input token count | +| output_tokens | integer | Nullable. Output token count | +| generated_at | timestamp | Server default now() | + +**Indexes:** `pi_a_id`, `pi_b_id` + ### AccessAllowlist Admin-managed list of pre-approved ORCID IDs. ORCIDs on this list bypass the pre-release access gate and land directly in `allowed` state on first login. @@ -176,6 +195,42 @@ Stores PI/agent reviews of collaboration proposals. **Constraint:** Unique on (thread_decision_id, agent_id) — each agent reviews a thread decision once. +### PiProposalEvaluation + +Structured NIH-style evaluations submitted by PIs through the `/proposals` tab. Separate from `ProposalReview` (the 1–4 agent-blocking system). See `proposal-review-pi.md` for the full specification. + +| Field | Type | Notes | +|---|---|---| +| id | uuid | Primary key | +| user_id | FK → User | The PI who submitted the evaluation | +| proposal_type | string(20) | `"agent"` or `"matchmaker"` — stored for admin analysis only, never shown to evaluator | +| thread_decision_id | FK → ThreadDecision | Nullable. Set when evaluating an agent-generated proposal | +| matchmaker_proposal_id | FK → MatchmakerProposal | Nullable. Set when evaluating a matchmaker proposal | +| score_significance | smallint | 1–9 NIH criterion score | +| score_innovation | smallint | 1–9 NIH criterion score | +| score_approach | smallint | 1–9 NIH criterion score | +| score_investigators | smallint | 1–9 NIH criterion score | +| score_environment | smallint | 1–9 NIH criterion score | +| score_overall_impact | smallint | 1–9 holistic impact score (not an average of criteria) | +| comments_significance | text | Nullable. Free-text notes for Significance criterion | +| comments_innovation | text | Nullable. Free-text notes for Innovation criterion | +| comments_approach | text | Nullable. Free-text notes for Approach criterion | +| comments_investigators | text | Nullable. Free-text notes for Investigators criterion | +| comments_environment | text | Nullable. Free-text notes for Environment criterion | +| comments_overall | text | Required. Overall evaluation narrative | +| evaluated_at | timestamp | Server default now() | +| updated_at | timestamp | Nullable. Set on amendment | + +**Constraints:** +- `CHECK (proposal_type IN ('agent', 'matchmaker'))` +- All six scores: `CHECK (score_X BETWEEN 1 AND 9)` +- `CHECK (thread_decision_id IS NOT NULL OR matchmaker_proposal_id IS NOT NULL)` — exactly one must be set +- `UNIQUE (user_id, thread_decision_id)` and `UNIQUE (user_id, matchmaker_proposal_id)` — one evaluation per user per proposal; re-submission is an upsert (update in place) + +**Indexes:** `(user_id, proposal_type)`, `(thread_decision_id)`, `(matchmaker_proposal_id)` + +**Relationship to ProposalReview:** `ProposalReview` drives the agent-blocking workflow (1–4 scale, one row per agent per proposal). `PiProposalEvaluation` is a research-quality instrument for the PI's own assessment (NIH 1–9 scale, one row per PI per proposal). The two systems evolve independently. + ### EmailNotification Tracks each proposal notification email sent. See `email-proposal-review.md` for full spec. diff --git a/specs/labbot-matchmaker.md b/specs/labbot-matchmaker.md new file mode 100644 index 0000000..2d4d7a4 --- /dev/null +++ b/specs/labbot-matchmaker.md @@ -0,0 +1,247 @@ +# LabBot Matchmaker Specification + +## Overview + +The Matchmaker is an alternative pathway for generating collaboration proposals between two PIs without running the multi-agent simulation. An admin selects two PIs from dropdowns; a single LLM call reads both their public and private profiles and produces a proposal of identical quality and format to those generated by the agent dialogue system. The output is stored and displayed in a new **Matchmaker** tab in the admin dashboard. + +This is complementary to the agent system — not a replacement. It is useful for: +- Quickly generating proposals on demand before a simulation run +- Testing profile quality in isolation +- Generating proposals for PIs who don't yet have active agents + +--- + +## Admin UI (`/admin/matchmaker`) + +### Layout + +A new tab in the admin nav alongside "Discussions". + +**Top section — Generate form:** + +``` +[ PI A dropdown ▾ ] [ PI B dropdown ▾ ] [ Generate Proposal ] +``` + +Both dropdowns list all users who have a complete `ResearcherProfile`. Sorted alphabetically by name. The same user cannot be selected in both dropdowns (client-side enforcement — disable the selected user in the other dropdown). + +The **Generate Proposal** button is disabled until both PIs are selected. On click it POSTs to `/admin/matchmaker/generate` and shows an inline spinner while the LLM call runs (typically 5–15 seconds). + +**Main section — Proposals table:** + +Same visual style as `/admin/discussions`. + +| Column | Notes | +|---|---| +| Date/time | When generated | +| PI A | Name | +| PI B | Name | +| Confidence | `High` / `Moderate` / `Speculative` badge (color-coded: green/yellow/gray) | +| Title | First line of the proposal | +| Actions | View · Delete | + +**Filters:** +- PI filter (multi-select, filters rows where either PI matches) +- Confidence filter + +**Export:** Each proposal row has an HTML and plain-text export link, same as Discussions. + +### Proposal Detail View (`/admin/matchmaker/{id}`) + +Full-page view of a single proposal. Sections mirror the proposal format (see Output Format below). Includes: +- Header: PI A, PI B, confidence label, generated timestamp +- Full proposal body rendered as markdown +- "Back to Matchmaker" link + +--- + +## Backend + +### New Route: `POST /admin/matchmaker/generate` + +**Request body (form):** `pi_a_id`, `pi_b_id` (user UUIDs) + +**Steps:** +1. Load `ResearcherProfile` for both users (including `private_profile_md`). +2. Load recent publications for both users (up to 20, sorted by year desc). +3. Call `generate_matchmaker_proposal(profile_a, profile_b, pubs_a, pubs_b)` — see LLM Pipeline below. +4. Parse LLM output, extract confidence label. +5. Insert a `MatchmakerProposal` row. +6. Redirect to `/admin/matchmaker` (or return JSON for HTMX — see implementation note below). + +**Error handling:** If either user lacks a complete profile, return a 400 with an inline error message above the form: "PI A / PI B does not have a complete profile yet." + +### New Route: `GET /admin/matchmaker` + +Renders the tab with the generate form and proposals table. + +### New Route: `GET /admin/matchmaker/{id}` + +Renders the proposal detail view. + +### New Route: `POST /admin/matchmaker/{id}/delete` + +Deletes the proposal row. Redirects back to `/admin/matchmaker`. (HTML forms do not support `DELETE`; a POST sub-resource is used instead.) + +--- + +## Data Model + +### MatchmakerProposal + +| Field | Type | Notes | +|---|---|---| +| id | uuid | Primary key | +| pi_a_id | FK → User | | +| pi_b_id | FK → User | | +| proposal_md | text | Full proposal in markdown | +| title | text | Extracted first heading from proposal_md | +| confidence | string(20) | `high` / `moderate` / `speculative` | +| llm_model | string | Model used (e.g. `claude-opus-4-7`) | +| input_tokens | integer | | +| output_tokens | integer | | +| generated_at | timestamp | | + +`pi_a_id` and `pi_b_id` are stored in canonical order (lower UUID first) to avoid duplicate detection confusion. The UI always shows them in name-alphabetical order regardless. + +--- + +## LLM Pipeline + +### Model + +`claude-opus-4-7` — same model used for Phase 4 agent replies. + +### Prompt (`prompts/matchmaker.md`) + +``` +You are evaluating a potential research collaboration between two PIs. + +Your task is to produce a high-quality collaboration proposal that meets the +Collaboration Quality Standards below. + +--- +## Collaboration Quality Standards + +[Verbatim content from agent-system.md § "Collaboration Quality Standards"] + +--- +## PI A: {name_a} + +### Public Profile +{public_profile_a} + +### Private Instructions (confidential — do not quote directly) +{private_profile_a} + +### Recent Publications +{publications_a} + +--- +## PI B: {name_b} + +### Public Profile +{public_profile_b} + +### Private Instructions (confidential — do not quote directly) +{private_profile_b} + +### Recent Publications +{publications_b} + +--- +## Instructions + +Produce a collaboration proposal using the output format below. Apply the +Collaboration Quality Standards strictly. If you cannot identify a High or +Moderate confidence collaboration, produce a Speculative one and label it as such. + +Respect each PI's private instructions when framing the proposal: if a PI has +expressed preferences for specific topics or partners, weight those angles +positively. Do not quote or reveal any private instruction text in the output. +``` + +### Output Format + +The LLM output must follow this structure (identical to proposals generated by the agent dialogue system): + +```markdown +# [Collaboration Title] + +**Confidence:** High | Moderate | Speculative + +## Scientific Rationale +[2–3 paragraphs. Why these two labs? What does each bring that the other lacks? +Name specific techniques, datasets, reagents, or model systems.] + +## True Complementarity +- **PI A contributes:** [specific capabilities — not generic] +- **PI B contributes:** [specific capabilities — not generic] +- **Gap filled:** [what neither could do alone] + +## Concrete First Experiment +[1 paragraph. Scoped to days-to-weeks. Names specific assays, methods, +reagents, or datasets. Both labs are essential to execute it.] + +## Benefits to Each Lab +- **PI A benefits:** [specific, non-generic] +- **PI B benefits:** [specific, non-generic] + +## Open Questions / Next Steps +[Bullet list of what would need to be confirmed before committing effort] +``` + +The output format is enforced by wrapping the final proposal in `` tags (same pattern as `` in Phase 4). The parser extracts content inside those tags. The confidence label is parsed from the `**Confidence:**` line. + +--- + +## Admin Dashboard Integration + +### Nav update (`admin-dashboard.md` §API Routes additions) + +| Route | Purpose | +|---|---| +| `GET /admin/matchmaker` | Matchmaker tab with generate form and proposals table | +| `POST /admin/matchmaker/generate` | Run LLM pipeline and store result | +| `GET /admin/matchmaker/{id}` | Proposal detail view | +| `POST /admin/matchmaker/{id}/delete` | Delete a proposal | + +### Nav link + +Add **Matchmaker** between **Discussions** and **LLM Call Logs** in the admin sidebar. Show a count badge of total proposals (no filter). + +--- + +## Relationship to Agent System + +| Dimension | Agent System | Matchmaker | +|---|---|---| +| Input | Multi-turn Slack dialogue | Public + private profiles only | +| Latency | Minutes to hours (simulation run) | ~10 seconds (single LLM call) | +| Proposal quality standard | Collaboration Quality Standards | Identical | +| Output format | Identical | Identical | +| Private profile respected | Yes (agent's own) | Yes (both, read-only, not quoted) | +| PI notification | Slack DM | None (admin-only tool) | +| Stored in | `Proposal` table | `MatchmakerProposal` table | + +Proposals from both sources can be exported in the same HTML/plain-text format. + +--- + +## Design Principles + +- **Admin-only.** The Matchmaker tab is only accessible to `is_admin = true` users. No self-service endpoint. +- **Read-only for PIs.** PIs are not notified. Admins use this to explore matches and manually share results. +- **No simulation dependency.** Works without any running agent containers or Slack tokens. +- **Same quality bar.** The Collaboration Quality Standards section from `agent-system.md` is embedded verbatim in the matchmaker prompt. No relaxed criteria. +- **Prompt caching.** The system prompt (quality standards + both profiles) is structured to maximize Anthropic prompt cache hits when the same pair is regenerated. + +--- + +## PI Evaluation of Matchmaker Proposals + +Matchmaker proposals surface to PIs through the unified **Proposal Evaluations** tab (`/proposals`) alongside agent-generated proposals. The origin is not revealed to the PI in that view. PIs submit NIH-style 1–9 evaluations via the form at `/proposals/{token}/evaluate`. + +Evaluations are stored in `pi_proposal_evaluations` with `proposal_type = "matchmaker"` and `matchmaker_proposal_id` set. Admins can see the origin breakdown (agent vs. matchmaker) at `/admin/evaluations`. See `proposal-review-pi.md` for the full specification. + +**Note on CLI-created proposals:** Matchmaker proposals created via the CLI (which set `pi_a_name`/`pi_b_name` but leave `pi_a_id`/`pi_b_id` null) cannot be linked to a `user_id` and will not appear in the PI evaluation list. Admins should backfill the FK columns to include these proposals, or create them through the web UI form where FK resolution happens automatically. diff --git a/specs/labbot-podcast.md b/specs/labbot-podcast.md new file mode 100644 index 0000000..b6db1c1 --- /dev/null +++ b/specs/labbot-podcast.md @@ -0,0 +1,669 @@ +# LabBot Podcast Specification + +## Overview + +LabBot Podcast is a daily personalized research briefing service for researchers. It surfaces the single most relevant and impactful recent publication from the scientific literature based on the researcher's profile, generates a structured text summary highlighting findings and tools useful to their ongoing work, and produces a short audio episode via Mistral AI TTS. Researchers can subscribe to a personal RSS podcast feed to listen to the audio. + +The system runs once per day and requires no researcher interaction to be useful — but researchers can tune it through a web UI. There are two delivery paths: + +- **Agent path** — pilot-lab PIs with an approved `AgentRegistry` entry additionally receive the text summary as a Slack DM from their lab bot. +- **User path** — any researcher who has completed ORCID onboarding and has a `ResearcherProfile` with a research summary receives the podcast automatically. No Slack bot, agent approval, or admin action required. + +--- + +## Architecture + +### Service Placement + +LabBot Podcast runs as a separate Docker container (`podcast` service), mirroring the GrantBot pattern: +- Long-running scheduler process +- Executes once per calendar day at 9am UTC (1 hour after GrantBot) +- If the container was down at the scheduled time, runs immediately on startup (catch-up) +- State persisted in `data/podcast_state.json` (tracks which articles have been delivered per agent) + +### Delivery Paths + +| Path | Who | Profile source | Delivery | Audio/RSS key | +|---|---|---|---|---| +| **Agent** | Pilot-lab PIs with active `AgentRegistry` | `profiles/public/{agent_id}.md` (disk) | Slack DM + RSS | `agent_id` string | +| **User** | Any ORCID user with completed `ResearcherProfile` | `ResearcherProfile` DB row (structured fields) | RSS only | `user_id` UUID | + +Both paths run in the same daily scheduler pass. A user who has both a `ResearcherProfile` and an active agent is handled only by the agent path (no duplicate episode). + +### Dependencies on Existing Systems + +| Existing component | How Podcast uses it | +|---|---| +| `ResearcherProfile` DB model | Source of research areas, keywords, techniques, disease areas for the user path | +| `profiles/public/{lab}.md` | Profile text for the agent path (LLM article selection and summary) | +| `src/services/pubmed.py` | Literature search (keyword + MeSH queries) | +| `src/services/llm.py` | Article selection ranking and summary generation (all calls logged to `LlmCallLog`) | +| `AgentRegistry` | Maps agent → PI → Slack bot token for DM delivery (agent path only) | +| `User.id` (UUID) | Stable, opaque RSS feed token for the user path | +| Slack bot DM | Text summary delivery (agent path only) | + +### New External Dependency + +**Mistral AI API** — text-to-speech generation. +- Configured via `MISTRAL_API_KEY` environment variable +- Voice selection per agent configured in `data/podcast_voices.json` (agent_id → voice_id); falls back to a default voice if not set +- Audio files stored at `data/podcast_audio/{agent_id}/{YYYY-MM-DD}.mp3` + +--- + +## Daily Pipeline + +Each day the scheduler runs two loops in sequence: + +1. **Agent loop** — iterates over all active `AgentRegistry` entries and calls `run_pipeline_for_agent()` for each. +2. **User loop** — iterates over all `User` rows where `onboarding_complete=True` and `profile.research_summary IS NOT NULL`, skipping any whose `user_id` appeared in the agent loop, and calls `run_podcast_for_user()` for each. + +For each recipient, the pipeline executes the following steps sequentially: + +### Step 1: Load Profile + +- **Agent path**: read `profiles/public/{agent_id}.md` from disk. If absent, skip. +- **User path**: construct profile text from structured `ResearcherProfile` DB fields (`research_summary`, `disease_areas`, `techniques`, `experimental_models`, `keywords`). If `research_summary` is empty, skip. + +### Step 2: Build Search Queries + +Construct PubMed search terms from the profile: +- Extract top research area keywords +- Extract technique and experimental model terms +- Combine into 2–3 PubMed query strings (e.g., `(proteostasis OR unfolded protein response) AND (neurodegeneration OR proteomics)`) +- Inject any `extra_keywords` from `PodcastPreferences` as additional quoted terms +- Limit to publications from the last 14 days (rolling window ensures coverage across weekend/holiday gaps) +- Cap at 50 candidate abstracts + +### Step 3: Fetch Candidate Abstracts + +Use `src/services/pubmed.py` to execute each query and retrieve PMIDs + abstracts. Deduplicate across queries. Skip any PMID already in `podcast_state.json` for this recipient (agent or user) to prevent re-delivering the same article. + +### Step 4: LLM Article Selection (Sonnet) + +Single LLM call (Sonnet) with: +- The researcher's full profile text (disk for agent path; constructed from DB for user path) +- The list of candidate abstracts (title + abstract text, numbered) +- Any journal preferences from `PodcastPreferences` +- Prompt: `prompts/podcast-select.md` + +The LLM returns the index of the single best article, along with a one-sentence justification of why it is relevant to this researcher's ongoing work. If no article meets a minimum relevance threshold, it returns `null` and the pipeline skips delivery today. + +### Step 5: Generate Text Summary (Opus) + +One LLM call (Opus) with: +- The researcher's full profile text +- The selected article's title, abstract, and full text (fetched via `retrieve_full_text` if available in PMC, otherwise abstract only) +- Prompt: `prompts/podcast-summarize.md` + +Output is a structured text summary (see format below). This is used as the TTS input and stored in `PodcastEpisode.text_summary`. + +### Step 6: Generate Audio (Mistral AI) + +Pass the text summary to the Mistral AI TTS API: +- Voice: from `PodcastPreferences.voice_id`, or `MISTRAL_TTS_DEFAULT_VOICE` +- Model: configurable via `MISTRAL_TTS_MODEL` +- Output: MP3 file saved to: + - Agent path: `data/podcast_audio/{agent_id}/{YYYY-MM-DD}.mp3` + - User path: `data/podcast_audio/users/{user_id}/{YYYY-MM-DD}.mp3` +- If TTS fails, the episode DB row is **not** written (see commit-last ordering); the run returns `False`. + +### Step 7: Deliver via Slack DM _(agent path only)_ + +Send the text summary as a DM from the agent's Slack bot to its PI, appending the RSS feed URL. User-path episodes are delivered via RSS only — no Slack bot is required. + +### Step 8: Persist Episode and Update State + +1. Write the `PodcastEpisode` row to the DB: + - Agent path: `agent_id` set, `user_id` NULL + - User path: `user_id` set, `agent_id` NULL +2. Append the delivered PMID to `data/podcast_state.json` (keyed by `agent_id` or `user_id`) to prevent re-delivery. + +--- + +## Text Summary Format + +The Opus-generated summary follows a consistent structure. The prompt enforces this layout: + +``` +*Today's Research Brief — {Date}* + +*{Paper Title}* +{Authors} · {Journal} · {Year} + +*What they found:* +2–3 sentences on the core findings — specific results, effect sizes, or observations. + +*Key output:* +1–2 sentences on any tool, method, dataset, or reagent released with the paper (if applicable). Omit this section if the paper has no distinct output. + +*Why this matters for your lab:* +2–3 sentences connecting the paper's findings and outputs specifically to the PI's ongoing research areas, techniques, or open questions. Ground this in the PI's profile — name specific techniques, model systems, or questions from their work. + +*PubMed:* https://pubmed.ncbi.nlm.nih.gov/{PMID}/ +``` + +The Slack DM appends a line at the bottom: +> _Listen to the audio version: {rss_feed_url}_ + +--- + +## RSS Podcast Feed + +### Endpoints + +| Path | Auth | Key | +|---|---|---| +| `GET /podcast/{agent_id}/feed.xml` | None | Pilot-lab agent | +| `GET /podcast/{agent_id}/audio/{date}.mp3` | None | Pilot-lab agent | +| `GET /podcast/users/{user_id}/feed.xml` | None | Plain ORCID user | +| `GET /podcast/users/{user_id}/audio/{date}.mp3` | None | Plain ORCID user | + +All four endpoints are public and unauthenticated. The `user_id` UUID is opaque and acts as a stable, subscribable feed token — equivalent to a private podcast URL. Users retrieve their feed URL from the `/podcast/settings` page. + +### Feed Structure + +Standard RSS 2.0 with iTunes podcast extensions (identical structure for both paths): + +```xml + + + {Name} — LabBot Research Briefings + Daily personalized research summaries for {Name}. + {feed_url} + {Name} + + + {Paper Title} — {Date} + {text summary} + + {RFC 822 date} + {agent_id|user-{user_id}}-{YYYY-MM-DD} + {duration} + + ... + + +``` + +### Audio File Storage + +| Path | Audio directory | +|---|---| +| Agent path | `data/podcast_audio/{agent_id}/{YYYY-MM-DD}.mp3` | +| User path | `data/podcast_audio/users/{user_id}/{YYYY-MM-DD}.mp3` | + +Files are streamed with `Content-Type: audio/mpeg`. + +--- + +## LLM Prompt Files + +Two new prompt files in `prompts/`: + +### `prompts/podcast-select.md` + +Instructs the LLM to act as a literature triage assistant for a specific PI. It receives: +- The PI's public profile (research areas, techniques, open questions, unique capabilities) +- Numbered list of candidate abstracts (title + abstract) + +It must return: +- The number of the most relevant article, or `null` if none clears the relevance bar +- A one-sentence justification referencing a specific aspect of the PI's profile + +Key instructions in the prompt: +- Relevance is defined as: the paper's findings or outputs could plausibly accelerate or inform a specific aspect of the PI's ongoing work +- Recency alone is not sufficient — the connection must be specific +- Prefer papers that release a tool, method, dataset, or reagent alongside findings +- Do not pick review articles or editorials + +### `prompts/podcast-summarize.md` + +Instructs the LLM to act as a science communicator writing for a specific PI. It receives: +- The PI's public profile +- Full paper text (or abstract if full text unavailable) + +It must produce the structured summary described above. Key instructions: +- The "Why this matters for your lab" section must name specific techniques, model systems, or open questions from the PI's profile — no generic connections +- Tone is like a knowledgeable postdoc briefing their PI: specific, direct, no filler +- The "Key output" section is only included if the paper releases a concrete artifact (tool, code, dataset, method, reagent); skip it otherwise +- Target length: ~250 words total + +--- + +## Data Model + +### `PodcastEpisode` + +Rows are keyed by either `agent_id` (string) or `user_id` (UUID FK to `users.id`). Exactly one should be set per row. + +```python +class PodcastEpisode(Base): + __tablename__ = "podcast_episodes" + + id: Mapped[uuid.UUID] = mapped_column(primary_key=True, default=uuid.uuid4) + agent_id: Mapped[str | None] = mapped_column(String(50), nullable=True, index=True) + user_id: Mapped[uuid.UUID | None] = mapped_column(UUID, ForeignKey("users.id"), nullable=True, index=True) + episode_date: Mapped[date] = mapped_column(Date, nullable=False) + pmid: Mapped[str] = mapped_column(String(100), nullable=False) + paper_title: Mapped[str] = mapped_column(String(500), nullable=False) + paper_authors: Mapped[str] = mapped_column(String(500), nullable=False) + paper_journal: Mapped[str] = mapped_column(String(255), nullable=False) + paper_year: Mapped[int] = mapped_column(Integer, nullable=False) + paper_url: Mapped[str | None] = mapped_column(String(1000), nullable=True) + text_summary: Mapped[str] = mapped_column(Text, nullable=False) + audio_file_path: Mapped[str | None] = mapped_column(String(500), nullable=True) + audio_duration_seconds: Mapped[int | None] = mapped_column(Integer, nullable=True) + slack_delivered: Mapped[bool] = mapped_column(Boolean, default=False) + selection_justification: Mapped[str] = mapped_column(Text, nullable=False) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + __table_args__ = ( + # Agent-path: one episode per agent per day + UniqueConstraint("agent_id", "episode_date", name="uq_podcast_agent_date"), + # User-path: enforced by partial unique index (migration 0013): + # CREATE UNIQUE INDEX ix_podcast_episodes_user_date + # ON podcast_episodes (user_id, episode_date) WHERE user_id IS NOT NULL + ) +``` + +### `PodcastPreferences` + +Rows are keyed by either `agent_id` or `user_id`. Both columns are nullable and uniquely indexed. + +```python +class PodcastPreferences(Base): + __tablename__ = "podcast_preferences" + + id: Mapped[uuid.UUID] = mapped_column(primary_key=True, default=uuid.uuid4) + agent_id: Mapped[str | None] = mapped_column(String(50), nullable=True, unique=True, index=True) + user_id: Mapped[uuid.UUID | None] = mapped_column(UUID, ForeignKey("users.id"), nullable=True, unique=True, index=True) + voice_id: Mapped[str | None] = mapped_column(String(100), nullable=True) + extra_keywords: Mapped[list[str]] = mapped_column(ARRAY(String), server_default="{}") + preferred_journals: Mapped[list[str]] = mapped_column(ARRAY(String), server_default="{}") + deprioritized_journals: Mapped[list[str]] = mapped_column(ARRAY(String), server_default="{}") + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) +``` + +### State File (`data/podcast_state.json`) + +Keyed separately for agents and users: + +```json +{ + "agents": { + "": { "delivered_pmids": ["12345", "67890"] } + }, + "users": { + "": { "delivered_pmids": ["11111"] } + }, + "last_run_date": "2026-04-14" +} +``` + +The state file is a lightweight deduplication cache. The DB is the authoritative record for RSS generation and admin visibility. + +### Alembic Migrations + +| Migration | Creates / alters | +|---|---| +| `0010_add_podcast_episodes.py` | `podcast_episodes` table (agent path) | +| `0011_add_podcast_paper_url.py` | `paper_url` column | +| `0012_add_podcast_preferences.py` | `podcast_preferences` table (agent path) | +| `0013_podcast_user_support.py` | `user_id` FK on both tables; make `agent_id` nullable; partial unique index for user-path episodes | + +--- + +## Configuration + +New environment variables: + +| Variable | Required | Description | +|---|---|---| +| `MISTRAL_API_KEY` | Yes (for audio) | Mistral AI API key | +| `MISTRAL_TTS_MODEL` | No | TTS model ID (default: `mistral-tts-latest`) | +| `MISTRAL_TTS_DEFAULT_VOICE` | No | Default voice when no per-agent override exists | +| `PODCAST_BASE_URL` | Yes | Public base URL for RSS enclosure links (e.g., `https://copi.science`) | +| `PODCAST_SEARCH_WINDOW_DAYS` | No | Rolling search window in days (default: `14`) | +| `PODCAST_MAX_CANDIDATES` | No | Max PubMed abstracts per agent per day (default: `50`) | + +Per-agent voice overrides (Phase 2/3): `data/podcast_voices.json` +```json +{ + "su": "alex", + "wiseman": "stella" +} +``` +**Deprecated in Phase 4** — voice preferences move to the `podcast_preferences` DB table. The JSON file is still read as a fallback while the migration is in progress. + +--- + +## Docker Service + +Add `podcast` service to `docker-compose.yml` and `docker-compose.prod.yml`: + +```yaml +podcast: + build: . + command: python -m src.podcast.main + env_file: .env + volumes: + - ./data:/app/data + depends_on: + - postgres + profiles: + - podcast +``` + +Run with: `docker compose --profile podcast up -d podcast` + +--- + +## Module Structure + +``` +src/podcast/ +├── main.py # Scheduler entry point (APScheduler, same pattern as grantbot.py) +├── pipeline.py # Per-agent pipeline (steps 1–8 above) +├── pubmed_search.py # Query builder from ResearcherProfile +├── mistral_tts.py # Mistral AI TTS client wrapper +├── rss.py # RSS feed builder (reads from DB) +└── state.py # podcast_state.json read/write helpers + +src/routers/podcast.py # FastAPI routes: /podcast/{agent_id}/feed.xml, /podcast/{agent_id}/audio/{date}.mp3 +``` + +The scheduler in `src/podcast/main.py` follows the same catch-up-on-startup pattern as `src/agent/grantbot.py`: +1. On startup, check `data/podcast_state.json` for last run timestamp +2. If last run was before today's 9am UTC, run immediately +3. Schedule next run at 9am UTC + +--- + +## Admin Dashboard Integration + +Add a **Podcast** tab to the existing admin dashboard (`src/routers/admin.py` + `templates/admin.html`) showing: +- Table of recent episodes: agent, date, paper title, PMID, Slack delivered (yes/no), audio generated (yes/no) +- Link to each agent's RSS feed +- LLM call counts and token usage for the podcast pipeline (pulled from `LlmCallLog` filtered by `source = "podcast"`) + +The LLM calls from the podcast pipeline should set a `source` tag in `LlmCallLog` (add a `source` column via migration if not already present, or use the existing `extra_metadata` JSONB field). + +--- + +## PI Customization + +### Via Standing Instructions (Current) + +PIs can adjust podcast behavior through standing instructions to their lab bot (same DM mechanism as the agent system — see `pi-interaction.md`). The podcast pipeline reads the private profile when building the selection prompt. + +Examples of effective standing instructions: +- "For my daily podcast, focus only on papers that release a new tool or dataset — I don't need summaries of pure wet-lab findings" +- "Prioritize papers from computational biology journals for the podcast" +- "Skip anything about C. elegans — we're not pursuing that direction anymore" + +The bot's private profile rewrite (via `prompts/pi-profile-rewrite.md`) should include a `## Podcast Preferences` section that the podcast pipeline reads when constructing the selection and summarization prompts. + +### Via Preferences UI (Phase 4) + +A structured preferences page at `/agent/{agent_id}/podcast-settings` replaces the `data/podcast_voices.json` file and augments the standing-instructions mechanism with three explicit controls: + +1. **Voice** — select the TTS voice used for audio generation +2. **Extra search keywords** — additional terms appended to PubMed/preprint queries beyond the auto-extracted profile keywords +3. **Source preferences** — journals or preprint servers to prioritize (boosted in the selection prompt) or deprioritize + +See the **Podcast Preferences UI** section below for the full design. + +--- + +## User Opt-In / Opt-Out + +### Default State + +Podcast generation is **disabled by default** for all plain ORCID users. An eligible user (onboarded with a completed profile) must explicitly opt in from the `/podcast/settings` page before any episode is generated for them. This prevents unsolicited audio generation for users who signed up but have not engaged with the feature. + +The agent path is unaffected — pilot-lab agents are always enabled as long as their `AgentRegistry.status == "active"`. Only the user path is gated by the opt-in flag. + +### Storage + +The opt-in flag is stored as `podcast_enabled: bool` on the `PodcastPreferences` row, defaulting to `False`. Because `PodcastPreferences` is created on first save, the scheduler treats both "no row exists" and "`podcast_enabled = False`" as disabled. + +```python +class PodcastPreferences(Base): + ... + podcast_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default="false") +``` + +Migration: `0022_add_podcast_enabled.py` adds `podcast_enabled BOOLEAN NOT NULL DEFAULT false` to `podcast_preferences`. + +### Enforcement Points + +| Location | Check | +|---|---| +| Daily scheduler (`src/podcast/main.py`) | Skip user unless `PodcastPreferences.podcast_enabled = True` | +| On-demand trigger (`POST /podcast/user/generate`) | Return `403` unless user has `podcast_enabled = True` | + +The `_podcast_eligible()` helper in `src/routers/podcast.py` remains unchanged — it checks profile completeness only. The `podcast_enabled` flag is checked separately in the scheduler and on-demand endpoint. + +### Settings UI + +The `/podcast/settings` page shows the enable/disable toggle **at the top of the form**, before any other preferences. The feed URL card and all preference fields are always visible so users can inspect their URL and configure preferences before enabling. The toggle takes immediate effect on save. + +- **Disabled state**: toggle is off; the feed URL card notes that generation is paused. +- **Enabled state**: toggle is on; generation runs in the daily scheduler at 9am UTC. + +### Alembic Migration + +```python +# 0022_add_podcast_enabled.py +def upgrade() -> None: + op.add_column( + "podcast_preferences", + sa.Column("podcast_enabled", sa.Boolean(), nullable=False, server_default="false"), + ) + +def downgrade() -> None: + op.drop_column("podcast_preferences", "podcast_enabled") +``` + +--- + +## Podcast Preferences UI + +### Route and Access Control + +| Route | Method | Handler | Access | Notes | +|---|---|---|---|---| +| `/agent/{agent_id}/podcast-settings` | `GET` | Render agent preferences form | Agent owner or admin | Agent path | +| `/agent/{agent_id}/podcast-settings` | `POST` | Save agent preferences | Agent owner or admin | Agent path | +| `/podcast/settings` | `GET` | Render user preferences form | Any authenticated user with completed profile | User path | +| `/podcast/settings` | `POST` | Save user preferences | Any authenticated user with completed profile | User path | +| `/podcast/user/generate` | `POST` | Trigger on-demand episode | Any authenticated user with completed profile | User path | + +The agent-path routes remain in `src/routers/agent_page.py` with the same `get_agent_with_access()` ownership check. The user-path routes live in `src/routers/podcast.py` and use `get_current_user()` + a profile-completeness check (`onboarding_complete=True` and `profile.research_summary IS NOT NULL`). + +### User Feed URL + +After saving preferences or visiting `/podcast/settings`, the user sees their personal feed URL: + +``` +{PODCAST_BASE_URL}/podcast/users/{user.id}/feed.xml +``` + +This URL: +- Requires no authentication to read (subscribe in any podcast app) +- Is stable for the lifetime of the user account +- Acts as an opaque token — not guessable, not secret, but not publicly listed +- Is displayed with a one-click copy button on the settings page + +### Form Fields + +#### 1. Voice Selection + +A ` - - + {% endfor %} + + {% endif %} - - + + +
+
+
+ {% set ratings = [ + (1, "Not a good idea", "Not interesting OR multiple major weaknesses"), + (2, "Good idea", "Medium interest OR one major weakness"), + (3, "Great idea", "High interest, minor weaknesses only"), + (4, "Excellent idea", "High interest, no notable weaknesses"), + ] %} + {% for val, label, desc in ratings %} + + {% endfor %} +
+ + +
+
+ + + @@ -222,6 +297,13 @@

Reviewed Proposals

View and edit your agent's private behavioral profile.

+ + +
Podcast Settings
+

Customize your daily research briefing: voice, extra keywords, and journal preferences.

+
+ @@ -345,8 +427,31 @@

Reviewed Proposals

}); }); +function toggleProposal(idx, total) { + for (var i = 1; i <= total; i++) { + var body = document.getElementById('prop-body-' + i); + var chev = document.getElementById('prop-chevron-' + i); + if (!body) continue; + if (i === idx) { + var opening = body.classList.toggle('hidden'); + if (chev) chev.style.transform = opening ? 'rotate(180deg)' : ''; + } else { + body.classList.add('hidden'); + if (chev) chev.style.transform = 'rotate(180deg)'; + } + } +} + +function toggleDiscussion(btn, panelId) { + var panel = document.getElementById(panelId); + var label = btn.querySelector('.disc-label'); + var arrow = btn.querySelector('.disc-arrow'); + var hidden = panel.classList.toggle('hidden'); + label.textContent = hidden ? 'Show discussion' : 'Hide discussion'; + if (arrow) arrow.style.transform = hidden ? '' : 'rotate(180deg)'; +} + function showTab(btn, panelId) { - // Find sibling tab buttons and panels within the same proposal card var container = btn.closest('.border-t'); var buttons = container.querySelectorAll('.tab-btn'); var panels = container.querySelectorAll('.tab-panel'); diff --git a/templates/agent/podcast_settings.html b/templates/agent/podcast_settings.html new file mode 100644 index 0000000..cdd5d58 --- /dev/null +++ b/templates/agent/podcast_settings.html @@ -0,0 +1,126 @@ +{% extends "base.html" %} +{% block title %}Podcast Settings — {{ agent.bot_name }} — CoPI{% endblock %} + +{% block content %} +
+
+
+ ← My Agent +

Podcast Settings

+

Customize your daily research briefing for {{ agent.bot_name }}

+
+
+ + {% if saved %} +
+ Preferences saved successfully. +
+ {% endif %} + +
+ + + +
+
+

Enable Podcast

+

+ When enabled, a new research briefing is generated daily and sent via Slack DM. +

+
+ +
+ + +
+

Voice

+

+ Select the text-to-speech voice used for your audio episodes. + Voices are from Mistral AI's voxtral-mini-tts-latest model. +

+ +
+ + +
+

Extra Search Keywords

+

+ Additional terms to include in the daily literature search, beyond what is + auto-extracted from your lab profile. One keyword or phrase per line (max 20). + These are added as quoted PubMed search terms. +

+ +

+ {% if prefs and prefs.extra_keywords %}{{ prefs.extra_keywords | length }} keyword(s) saved.{% else %}No extra keywords set.{% endif %} +

+
+ + +
+

Source Preferences

+

+ Guide the article selection by telling the AI which journals or preprint servers + to prioritize or avoid. One source per line (or comma-separated). +

+ +
+ + +
+ +
+ + +
+
+ +
+ + + Cancel + +
+

+ Changes take effect on the next scheduled podcast run (daily at 9am UTC). +

+
+
+{% endblock %} diff --git a/templates/base.html b/templates/base.html index 74db818..afb97a0 100644 --- a/templates/base.html +++ b/templates/base.html @@ -55,6 +55,14 @@ {% endif %} + + Proposals + + + Podcast + {% if current_user.is_admin and not impersonation_banner %} @@ -85,9 +93,12 @@ Jobs Activity Discussions + Matchmaker Agents + Podcast Access Waitlist + Evaluations @@ -134,6 +145,9 @@ } else if (fmt === 'short') { el.textContent = d.toLocaleDateString([], {month: 'short', day: 'numeric'}) + ' ' + d.toLocaleTimeString([], {hour: '2-digit', minute: '2-digit'}); + } else if (fmt === 'disc') { + el.textContent = d.toLocaleDateString([], {weekday: 'long'}) + ' ' + + d.toLocaleTimeString([], {hour: 'numeric', minute: '2-digit'}); } else { el.textContent = d.toLocaleDateString([], {month: 'short', day: 'numeric', year: 'numeric'}) + ' ' + d.toLocaleTimeString([], {hour: '2-digit', minute: '2-digit'}); diff --git a/templates/podcast_settings.html b/templates/podcast_settings.html new file mode 100644 index 0000000..18d658e --- /dev/null +++ b/templates/podcast_settings.html @@ -0,0 +1,211 @@ +{% extends "base.html" %} +{% block title %}Podcast Settings — CoPI{% endblock %} + +{% block content %} +
+
+
+ ← My Profile +

Podcast Settings

+

Customize your daily LabBot research briefing

+
+
+ + {% if saved %} +
+ Preferences saved successfully. +
+ {% endif %} + + +
+

Your Podcast Feed URL

+

+ Subscribe to this URL in any podcast app (Apple Podcasts, Overcast, Pocket Casts, etc.) + to receive audio episodes automatically. The URL is stable and does not require login. +

+
+ + +
+ +

+ {% if podcast_enabled %} + New episodes are generated daily at 9am UTC. You can also + . + {% else %} + Enable the podcast below to start receiving daily episodes. + {% endif %} +

+ +
+ + + {% if not podcast_enabled %} +
+ Podcast generation is currently disabled. Enable it below to start receiving daily briefings. +
+ {% endif %} + +
+ + + +
+
+

Enable Podcast

+

+ When enabled, a new research briefing is generated daily at 9am UTC and added to your feed. +

+
+ +
+ + +
+

Voice

+

+ Select the text-to-speech voice used for your audio episodes. + Voices are from Mistral AI's voxtral-mini-tts-latest model. +

+ +
+ + +
+

Extra Search Keywords

+

+ Additional terms to include in the daily literature search, beyond what is + auto-extracted from your profile. One keyword or phrase per line (max 20). + These are added as quoted PubMed search terms. +

+ +

+ {% if prefs and prefs.extra_keywords %}{{ prefs.extra_keywords | length }} keyword(s) saved.{% else %}No extra keywords set.{% endif %} +

+
+ + +
+

Source Preferences

+

+ Guide the article selection by telling the AI which journals or preprint servers + to prioritize or avoid. One source per line (or comma-separated). +

+ +
+ + +
+ +
+ + +
+
+ +
+ + + Cancel + +
+

+ Changes take effect on the next scheduled podcast run (daily at 9am UTC). +

+
+ + {% if recent_episodes %} + +
+

Recent Episodes

+ +
+ {% endif %} +
+{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/profile/view.html b/templates/profile/view.html index 0bbc22d..e4b2d53 100644 --- a/templates/profile/view.html +++ b/templates/profile/view.html @@ -29,6 +29,16 @@

{{ user.name }}

+ {% if podcast_incomplete %} +
+

Podcast not available yet

+

+ Your research profile needs to be generated before you can access Podcast settings. + Once your profile is ready you can visit Podcast Settings. +

+
+ {% endif %} + {% if just_completed_onboarding %}

Welcome to CoPI!

diff --git a/templates/proposals/evaluate.html b/templates/proposals/evaluate.html new file mode 100644 index 0000000..8df9b9b --- /dev/null +++ b/templates/proposals/evaluate.html @@ -0,0 +1,168 @@ +{% extends "base.html" %} +{% block title %}Evaluate Proposal — CoPI{% endblock %} + +{% block extra_head %} + + +{% endblock %} + +{% block content %} +
+ + +
+ + ← Back to proposals + + {% if group_token is defined and total_steps is defined and total_steps > 1 %} + + {{ step + 1 }} of {{ total_steps }} + + {% endif %} +
+ + +
+

{{ title }}

+

Proposed collaboration with {{ collaborator }}

+
+
{{ body_md }}
+
+ + +
+ + Show scoring guide (NIH 1–9 scale) ▾ + +
+

+ Scores are whole numbers from 1 (best) to 9 (worst). + Use the full range — scores of 1 and 9 are expected to be rare. +

+ + + + + + + + + + + + + + + + + + + +
ScoreDescriptorStrengths / Weaknesses
1ExceptionalEssentially no weaknesses
2OutstandingNegligible weaknesses
3ExcellentOnly minor weaknesses
4Very GoodNumerous minor weaknesses
5GoodAt least one moderate weakness
6SatisfactorySome moderate weaknesses
7FairAt least one major weakness
8MarginalA few major weaknesses
9PoorNumerous major weaknesses
+

+ Scores 1–3 = high impact; 4–6 = moderate; 7–9 = low. +

+
+
+ + + {% if group_token is defined %} +
+ {% else %} + + {% endif %} + + + {% set existing_impact = evaluation.score_overall_impact if evaluation else none %} +
+

Overall Impact Score

+

+ Your holistic assessment of the likelihood that this collaboration would exert a + sustained, powerful influence on the research field(s) involved. + A proposal need not be strong in every dimension to earn a high impact score. +

+ +
+
+ {% for s in range(1, 10) %} +
+ + +
+ {% endfor %} +
+
+ Exceptional + Poor +
+
+ + + +
+ +
+ Cancel + {% if group_token is defined and step + 1 < total_steps %} + + {% else %} + + {% endif %} +
+ + {% if evaluation and evaluation.updated_at %} +

+ Last updated {{ evaluation.updated_at.strftime('%b %d %H:%M') }} +

+ {% elif evaluation %} +

+ Submitted {{ evaluation.evaluated_at.strftime('%b %d %H:%M') }} +

+ {% endif %} + +
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/proposals/list.html b/templates/proposals/list.html new file mode 100644 index 0000000..2f4414e --- /dev/null +++ b/templates/proposals/list.html @@ -0,0 +1,75 @@ +{% extends "base.html" %} +{% block title %}Collaboration Proposals — CoPI{% endblock %} + +{% block content %} +
+
+

Collaboration Proposals

+

+ Evaluate proposals involving your lab using the NIH 1–9 impact scale. +

+
+ + {% if flash_message %} +
+ {{ flash_message }} +
+ {% endif %} + + {% if groups %} +
+ {% for g in groups %} +
+
+

With + {% if g.collaborator_agent_id %} + {{ g.collaborator }} + {% else %} + {{ g.collaborator }} + {% endif %} +

+

+ {{ g.total }} proposal{{ 's' if g.total != 1 else '' }} + {% if g.evaluated_count > 0 and not g.all_evaluated %} + · {{ g.evaluated_count }} of {{ g.total }} evaluated + {% endif %} +

+
+
+ {% if g.all_evaluated %} + + All evaluated + + + Review + + {% else %} + + Evaluate + + {% endif %} +
+
+ {% endfor %} +
+ +

+ {{ groups | length }} collaborator{{ 's' if groups | length != 1 else '' }} +

+ + {% else %} +
+

No collaboration proposals yet.

+

+ Proposals will appear here once your agent has completed discussions or + the admin has generated a Matchmaker proposal involving you. +

+
+ {% endif %} +
+{% endblock %} diff --git a/templates/researcher/view.html b/templates/researcher/view.html new file mode 100644 index 0000000..de7f8c0 --- /dev/null +++ b/templates/researcher/view.html @@ -0,0 +1,103 @@ +{% extends "base.html" %} +{% block title %}{{ pi_user.name }} — CoPI{% endblock %} + +{% block content %} +
+
+

{{ pi_user.name }}

+ {% if pi_user.institution %} +

{{ pi_user.institution }}

+ {% endif %} + {% if pi_user.department %} +

{{ pi_user.department }}

+ {% endif %} +
+ + {% if profile and profile.research_summary %} +
+

Research Summary

+

{{ profile.research_summary }}

+
+ + {% if profile.techniques or profile.experimental_models or profile.disease_areas or profile.key_targets or profile.keywords %} +
+
+ {% if profile.techniques %} +
+

Techniques & Methods

+
+ {% for t in profile.techniques %} + {{ t }} + {% endfor %} +
+
+ {% endif %} + + {% if profile.experimental_models %} +
+

Model Systems

+
+ {% for m in profile.experimental_models %} + {{ m }} + {% endfor %} +
+
+ {% endif %} + + {% if profile.disease_areas %} +
+

Disease Areas

+
+ {% for d in profile.disease_areas %} + {{ d }} + {% endfor %} +
+
+ {% endif %} + + {% if profile.key_targets %} +
+

Key Molecular Targets

+
+ {% for k in profile.key_targets %} + {{ k }} + {% endfor %} +
+
+ {% endif %} + + {% if profile.keywords %} +
+

Keywords

+
+ {% for kw in profile.keywords %} + {{ kw }} + {% endfor %} +
+
+ {% endif %} +
+
+ {% endif %} + + {% else %} +
+ No public profile content yet. +
+ {% endif %} +
+ + +{% endblock %} diff --git a/testpairs2.tsv b/testpairs2.tsv new file mode 100644 index 0000000..cbfed1d --- /dev/null +++ b/testpairs2.tsv @@ -0,0 +1,12 @@ +Briney Paulson +Kim Paulson +Paulson Azumaya +Paulson Briney +Paulson Forli +Paulson Mravic +Paulson Petrascheck +Paulson Pwu +Paulson Zaro +Susa Paulson +Ward Paulson +Wilson Paulson diff --git a/tests/test_podcast.py b/tests/test_podcast.py new file mode 100644 index 0000000..76e6138 --- /dev/null +++ b/tests/test_podcast.py @@ -0,0 +1,343 @@ +"""Unit tests for podcast pipeline pure-logic functions and RSS builder.""" + +import json +import os +import tempfile +from datetime import date +from pathlib import Path +from types import SimpleNamespace + +import pytest + +from src.podcast.pubmed_search import build_queries +from src.podcast.pipeline import ( + _format_candidates_for_prompt, + _extract_section_text, + _build_profile_text_from_db, +) +from src.podcast.rss import build_feed +from src.podcast.state import ( + get_delivered_pmids, + record_delivery, + get_delivered_pmids_for_user, + record_delivery_for_user, +) + + +# --------------------------------------------------------------------------- +# build_queries +# --------------------------------------------------------------------------- + +class TestBuildQueries: + def test_disease_areas_produce_query(self): + profile = {"disease_areas": ["neurodegeneration", "Alzheimer's disease"], "techniques": [], "experimental_models": [], "keywords": []} + queries = build_queries(profile) + assert len(queries) >= 1 + assert "neurodegeneration" in queries[0] + + def test_techniques_produce_second_query(self): + profile = { + "disease_areas": ["cancer"], + "techniques": ["CRISPR", "flow cytometry"], + "experimental_models": [], + "keywords": [], + } + queries = build_queries(profile) + assert len(queries) >= 2 + assert any("CRISPR" in q for q in queries) + + def test_keywords_produce_third_query(self): + profile = { + "disease_areas": ["diabetes"], + "techniques": ["proteomics"], + "experimental_models": [], + "keywords": ["insulin signaling", "beta cell"], + } + queries = build_queries(profile) + assert len(queries) >= 3 + assert any("insulin signaling" in q or "beta cell" in q for q in queries) + + def test_empty_profile_returns_empty(self): + queries = build_queries({}) + assert queries == [] + + def test_fallback_to_research_summary(self): + profile = {"research_summary": "Studying ribosome biogenesis mechanisms"} + queries = build_queries(profile) + assert len(queries) == 1 + + def test_queries_are_quoted_terms(self): + profile = {"disease_areas": ["proteostasis"], "techniques": [], "experimental_models": [], "keywords": []} + queries = build_queries(profile) + assert '"proteostasis"' in queries[0] + + +# --------------------------------------------------------------------------- +# _format_candidates_for_prompt +# --------------------------------------------------------------------------- + +class TestFormatCandidates: + def test_numbers_candidates_from_one(self): + records = [ + {"title": "Paper A", "abstract": "Abstract A", "journal": "Nature", "year": 2024}, + {"title": "Paper B", "abstract": "Abstract B", "journal": "Science", "year": 2024}, + ] + text = _format_candidates_for_prompt(records) + assert text.startswith("1.") + assert "2." in text + + def test_includes_title_and_abstract(self): + records = [{"title": "CRISPR therapy", "abstract": "We developed a new approach.", "journal": "Cell", "year": 2025}] + text = _format_candidates_for_prompt(records) + assert "CRISPR therapy" in text + assert "We developed a new approach." in text + + def test_truncates_long_abstract(self): + long_abstract = "x" * 1000 + records = [{"title": "T", "abstract": long_abstract, "journal": "J", "year": 2024}] + text = _format_candidates_for_prompt(records) + assert len(text) < 1000 # abstract truncated to 600 chars + + def test_handles_missing_fields(self): + records = [{"title": "Minimal record"}] + text = _format_candidates_for_prompt(records) + assert "Minimal record" in text + assert "No abstract" in text + + +# --------------------------------------------------------------------------- +# _extract_section_text +# --------------------------------------------------------------------------- + +class TestExtractSectionText: + SAMPLE_MD = """## Research Summary +We study protein folding in neurons. + +## Key Methods and Technologies +- Cryo-EM +- Mass spectrometry + +## Podcast Preferences +Focus on computational tools only. +""" + + def test_extracts_research_summary(self): + text = _extract_section_text(self.SAMPLE_MD, "Research Summary") + assert "protein folding" in text + + def test_extracts_podcast_preferences(self): + text = _extract_section_text(self.SAMPLE_MD, "Podcast Preferences") + assert "computational tools" in text + + def test_stops_at_next_section(self): + text = _extract_section_text(self.SAMPLE_MD, "Research Summary") + assert "Cryo-EM" not in text + + def test_missing_section_returns_empty(self): + text = _extract_section_text(self.SAMPLE_MD, "Nonexistent Section") + assert text == "" + + +# --------------------------------------------------------------------------- +# RSS feed builder +# --------------------------------------------------------------------------- + +def _make_episode(**kwargs): + """Create a minimal PodcastEpisode-like object for RSS tests.""" + defaults = dict( + episode_date=date(2026, 4, 10), + paper_title="A Great Paper", + paper_authors="Smith J et al.", + paper_journal="Nature", + paper_year=2026, + pmid="12345678", + paper_url=None, + text_summary="This paper found something important.", + audio_file_path=None, + audio_duration_seconds=None, + slack_delivered=True, + selection_justification="Highly relevant to the PI's work.", + ) + defaults.update(kwargs) + return SimpleNamespace(**defaults) + + +class TestBuildFeed: + # --- agent path --- + + def test_returns_valid_xml_root(self): + xml = build_feed("Jane Smith", [], "https://example.com", agent_id="testagent") + assert xml.startswith("1:30" in xml + + def test_no_enclosure_when_no_audio(self): + ep = _make_episode(audio_file_path=None) + xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent") + assert "") + xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent") + assert "Proteins & <Stuff>" in xml + + def test_empty_episodes_list(self): + xml = build_feed("Jane Smith", [], "https://example.com", agent_id="testagent") + assert "" not in xml + + def test_agent_guid_format(self): + ep = _make_episode() + xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent") + assert "testagent-2026-04-10" in xml + + # --- user path --- + + def test_user_feed_url_uses_user_id(self): + uid = "11111111-2222-3333-4444-555555555555" + xml = build_feed("Alice Brown", [], "https://example.com", user_id=uid) + assert f"/podcast/users/{uid}/feed.xml" in xml + + def test_user_feed_has_correct_pi_name(self): + uid = "11111111-2222-3333-4444-555555555555" + xml = build_feed("Alice Brown", [], "https://example.com", user_id=uid) + assert "Alice Brown" in xml + + def test_user_audio_url_uses_user_path(self, tmp_path): + uid = "11111111-2222-3333-4444-555555555555" + audio_file = tmp_path / "2026-04-10.mp3" + audio_file.write_bytes(b"\x00" * 500) + ep = _make_episode(audio_file_path=str(audio_file)) + xml = build_feed("Alice Brown", [ep], "https://example.com", user_id=uid) + assert f"/podcast/users/{uid}/audio/2026-04-10.mp3" in xml + + def test_user_guid_format(self): + uid = "11111111-2222-3333-4444-555555555555" + ep = _make_episode() + xml = build_feed("Alice Brown", [ep], "https://example.com", user_id=uid) + assert f"user-{uid}-2026-04-10" in xml + + +# --------------------------------------------------------------------------- +# State helpers — user path +# --------------------------------------------------------------------------- + +class TestUserState: + def test_new_user_has_empty_delivered_set(self, tmp_path, monkeypatch): + monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json") + result = get_delivered_pmids_for_user("user-uuid-abc") + assert result == set() + + def test_record_and_retrieve_user_delivery(self, tmp_path, monkeypatch): + monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json") + record_delivery_for_user("user-uuid-abc", "12345") + record_delivery_for_user("user-uuid-abc", "67890") + result = get_delivered_pmids_for_user("user-uuid-abc") + assert result == {"12345", "67890"} + + def test_user_and_agent_state_are_independent(self, tmp_path, monkeypatch): + monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json") + record_delivery("myagent", "11111") + record_delivery_for_user("user-uuid-abc", "22222") + assert get_delivered_pmids("myagent") == {"11111"} + assert get_delivered_pmids_for_user("user-uuid-abc") == {"22222"} + # no cross-contamination + assert "22222" not in get_delivered_pmids("myagent") + assert "11111" not in get_delivered_pmids_for_user("user-uuid-abc") + + def test_duplicate_pmid_not_added_twice(self, tmp_path, monkeypatch): + monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json") + record_delivery_for_user("user-uuid-abc", "99999") + record_delivery_for_user("user-uuid-abc", "99999") + raw = json.loads((tmp_path / "state.json").read_text()) + assert raw["users"]["user-uuid-abc"]["delivered_pmids"].count("99999") == 1 + + def test_atomic_write_leaves_valid_json(self, tmp_path, monkeypatch): + monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json") + record_delivery_for_user("u1", "aaa") + content = (tmp_path / "state.json").read_text() + parsed = json.loads(content) # must be valid JSON + assert "users" in parsed + + +# --------------------------------------------------------------------------- +# _build_profile_text_from_db +# --------------------------------------------------------------------------- + +class TestBuildProfileTextFromDb: + def _make_user(self, **kwargs): + defaults = dict(name="Dr. Alice", institution="MIT", department="Biology") + defaults.update(kwargs) + return SimpleNamespace(**defaults) + + def _make_profile(self, **kwargs): + defaults = dict( + research_summary="We study protein aggregation.", + disease_areas=["Alzheimer's", "Parkinson's"], + techniques=["cryo-EM", "mass spectrometry"], + experimental_models=["mouse", "iPSC"], + keywords=["proteostasis", "neurodegeneration"], + ) + defaults.update(kwargs) + return SimpleNamespace(**defaults) + + def test_includes_user_name(self): + text = _build_profile_text_from_db(self._make_user(), self._make_profile()) + assert "Dr. Alice" in text + + def test_includes_research_summary(self): + text = _build_profile_text_from_db(self._make_user(), self._make_profile()) + assert "protein aggregation" in text + + def test_includes_disease_areas(self): + text = _build_profile_text_from_db(self._make_user(), self._make_profile()) + assert "Alzheimer" in text + + def test_includes_techniques(self): + text = _build_profile_text_from_db(self._make_user(), self._make_profile()) + assert "cryo-EM" in text + + def test_handles_none_fields_gracefully(self): + profile = self._make_profile(disease_areas=None, techniques=None, keywords=None) + text = _build_profile_text_from_db(self._make_user(), profile) + assert "protein aggregation" in text # summary still present + + def test_handles_missing_institution(self): + user = self._make_user(institution=None, department=None) + text = _build_profile_text_from_db(user, self._make_profile()) + assert "Dr. Alice" in text