diff --git a/.dockerignore b/.dockerignore
index 038120b..a6f4128 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,3 +1,4 @@
+README.md
 bin/
 .git/
-*.tar.gz
\ No newline at end of file
+*.tar.gz
diff --git a/.github/workflows/e2e-tests.yaml b/.github/workflows/e2e-tests.yaml
new file mode 100644
index 0000000..f3e8c45
--- /dev/null
+++ b/.github/workflows/e2e-tests.yaml
@@ -0,0 +1,109 @@
+name: e2e
+on:
+  pull_request:
+    branches: [main]
+  push:
+    branches: [main]
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  KIND_VERSION: v0.32.0
+  IMAGE: ghcr.io/converged-computing/fluence:latest
+
+jobs:
+  e2e:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      #- name: Set up Docker Buildx
+      #  uses: docker/setup-buildx-action@v3
+
+     # - name: Build fluence image
+     #   uses: docker/build-push-action@v6
+     #   with:
+     #     context: .
+     #     file: ./Dockerfile
+     #     push: false
+     #     load: true
+     #     tags: ${{ env.IMAGE }}
+     #     cache-from: type=gha
+     #     cache-to: type=gha,mode=max
+      - name: Create k8s Kind Cluster
+        uses: helm/kind-action@v1.10.0
+        with:
+          version: v0.32.0              # Define your custom KinD CLI version here
+          node_image: kindest/node:v1.36.1
+          config: ./deploy/kind-config.yaml
+          
+      - name: Free Disk Space (Ubuntu)
+        run: |
+          echo "=== Disk space before cleanup ==="
+          df -h
+          
+          # Remove large software runtimes and tools
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          
+          # Clean package caches
+          sudo apt-get clean          
+          echo "=== Disk space after cleanup ==="
+          df -h
+
+      - name: Deploy fluence (base)
+        run: |
+          kubectl apply -f deploy/fluence-test.yaml
+          kubectl rollout status -n kube-system deployment/fluence --timeout=180s
+          POD=$(kubectl -n kube-system get pods -l app=fluence -o name | head -1)
+          kubectl -n kube-system exec "${POD#pod/}" -- ls /tmp/
+          kubectl -n kube-system logs "${POD#pod/}"
+          kubectl -n kube-system exec "${POD#pod/}" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
+          kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{": cpu="}{.status.allocatable.cpu}{" mem="}{.status.allocatable.memory}{"\n"}{end}'
+         
+      - name: E2E - classical gang
+        run: bash test/e2e/01-classical-gang.sh
+
+      - name: Deploy quantum add-on
+        run: |
+          # Includes the device plugin and oriented to testing container
+          kubectl apply -f deploy/fluence-resources-test.yaml
+          kubectl rollout restart -n kube-system deployment/fluence
+          kubectl rollout status  -n kube-system deployment/fluence --timeout=60s
+          for i in $(seq 1 60); do
+            kubectl get nodes -o jsonpath='{range .items[*]}{.status.allocatable}{"\n"}{end}'
+            kubectl get nodes -o jsonpath='{range .items[*]}{.status.allocatable}{"\n"}{end}' | grep -q 'fluxion.flux-framework.org/qpu' && break
+            sleep 1
+          done
+          POD=$(kubectl -n kube-system get pods -l app=fluence -o name | head -1)
+          kubectl -n kube-system exec "${POD#pod/}" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
+
+      - name: Wait for webhook
+        run: |
+
+          # wait for the deployment AND for the caBundle to be populated on the webhook config
+          kubectl -n kube-system rollout status deployment/fluence-webhook --timeout=120s
+          for i in $(seq 1 30); do
+            cab=$(kubectl get mutatingwebhookconfiguration fluence-webhook \
+                  -o jsonpath='{.webhooks[0].clientConfig.caBundle}' 2>/dev/null)
+            [ -n "$cab" ] && break
+            sleep 2
+          done
+          # let TLS serving settle after caBundle patch
+          sleep 3 
+
+      - name: E2E - quantum placement
+        run: bash test/e2e/02-quantum-placement.sh
+
+      #- name: E2E - restart recovery (no double-book)
+      #  run: bash test/e2e/03-restart-recovery.sh
+
+      - name: Dump diagnostics on failure
+        if: failure()
+        run: |
+          kubectl get pods -A -o wide
+          kubectl logs -n kube-system deployment/fluence
diff --git a/Dockerfile b/Dockerfile
index ceed5da..2fe9596 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,33 +1,5 @@
-# Mr. Fluence!
-# Multi-stage build for the fluence scheduler.
-# The scheduler binary cgo-links flux-sched (Fluxion) for resource matching.
+FROM ghcr.io/converged-computing/fluence-base:latest AS builder
 
-FROM fluxrm/flux-core:noble AS builder
-
-USER root
-ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/lib
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-        libboost-graph-dev libboost-system-dev libboost-filesystem-dev \
-        libboost-regex-dev libyaml-cpp-dev libedit-dev libczmq-dev \
-        python3-yaml ninja-build cmake curl git wget ca-certificates \
- && rm -rf /var/lib/apt/lists/*
-
-# Go toolchain
-RUN wget -q https://go.dev/dl/go1.26.0.linux-amd64.tar.gz \
- && tar -C /usr/local -xzf go1.26.0.linux-amd64.tar.gz && rm go1.26.0.linux-amd64.tar.gz
-ENV PATH=$PATH:/usr/local/go/bin
-
-# flux-sched (Fluxion) with the Go reapi bindings -> /usr; build tree at /opt/flux-sched
-#RUN git clone https://github.com/flux-framework/flux-sched /opt/flux-sched \
-RUN git clone -b implement-reapi-cli-update-allocate https://github.com/vsoch/flux-sched /opt/flux-sched \
- && export FLUX_SCHED_VERSION=0.53.0 \
- && cd /opt/flux-sched && export WITH_GO=yes && ./configure --prefix=/usr \
- && mkdir build && cd build && cmake ../ && cd ../ && make -j"$(nproc)" && make install
-ENV FLUX_SCHED_ROOT=/opt/flux-sched
-
-# Build the scheduler
 WORKDIR /src
 COPY go.mod go.sum* ./
 RUN go mod download || true
diff --git a/Makefile b/Makefile
index 5ea76a7..46c3344 100644
--- a/Makefile
+++ b/Makefile
@@ -43,6 +43,20 @@ test-restore:
 image: ## Build the scheduler container image
 	docker build -t $(IMG) .
 
+.PHONY: test-image
+test-image: ## Build the scheduler container image
+	docker build -t $(IMG)-test .
+	docker push $(IMG)-test
+
+.PHONY: test-image-deploy
+test-image-deploy: test-image
+	kubectl patch podgroup training -n default --type=merge -p '{"metadata":{"finalizers":null}}' || true
+	kubectl delete deployments --all
+	kubectl delete pods --all
+	kubectl delete -f deploy/fluence-test.yaml
+	kubectl delete pods --all
+
+
 .PHONY: deploy
 deploy: ## Install RBAC + scheduler into kube-system
 	kubectl apply -f deploy/fluence.yaml
@@ -50,4 +64,4 @@ deploy: ## Install RBAC + scheduler into kube-system
 .PHONY: help
 help:
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
-	  awk 'BEGIN{FS=":.*?## "}{printf "  %-14s %s\n", $$1, $$2}'
\ No newline at end of file
+	  awk 'BEGIN{FS=":.*?## "}{printf "  %-14s %s\n", $$1, $$2}'
diff --git a/README.md b/README.md
index b1b3d33..50849dc 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,8 @@
 
 ![img/fluence.png](img/fluence.png)
 
+🚧 **UNDER DEVELOPMENT** 🚧 not ready for production use! I rolled back features since the recorded demo, and am going to add them back with proper testing. I have not finished this yet, but anticipate later in the week of 6/16/2026. Thank you for your patience! -@vsoch
+
 A Kubernetes scheduler plugin that places **pod groups** (and individual pods)
 by matching them against a [Fluxion](https://github.com/flux-framework/flux-sched)
 (flux-sched) resource graph built from the live cluster.
diff --git a/deploy/fluence-resources-test.yaml b/deploy/fluence-resources-test.yaml
new file mode 100644
index 0000000..d4fed97
--- /dev/null
+++ b/deploy/fluence-resources-test.yaml
@@ -0,0 +1,76 @@
+# Resources add-on for fluence. Turns on fluence-managed resources by supplying
+# (1) the resources config and (2) the device plugin that advertises them.
+# Quantum backends are just the example payload here; any resource type fluence
+# can model goes in the same ConfigMap. Apply AFTER deploy/fluence.yaml:
+#
+#   kubectl apply -f deploy/fluence.yaml            # base scheduler (no devices)
+#   kubectl apply -f deploy/fluence-resources.yaml  # + resources config + device plugin
+#   kubectl rollout restart deployment/fluence -n kube-system   # scheduler re-reads resources
+#
+# The base scheduler already mounts the `fluence-resources` ConfigMap optionally
+# and reads FLUENCE_RESOURCES, so this add-on is purely additive — no edits to
+# the base Deployment.
+
+# Resources config: the SINGLE source of truth for the resource types fluence
+# injects/advertises. The scheduler builds qpu/qubit graph vertices from it; the
+# device plugin derives which extended resources to advertise from the SAME
+# document (same rule), so the two cannot drift.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: fluence-resources
+  namespace: kube-system
+data:
+  resources.yaml: |
+    backends:
+      - name: ibm_fez
+        num_qubits: 156
+        vendor: ibm
+        qrmi_type: qiskit-runtime-service
+      - name: ibm_marrakesh
+        num_qubits: 156
+        vendor: ibm
+        qrmi_type: qiskit-runtime-service
+---
+# Device plugin: advertises the exotic Fluxion resource types (derived from the
+# resources config above) on every node, so pods can request them via resources
+# and NodeResourcesFit is satisfied.
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: fluence-deviceplugin
+  namespace: kube-system
+  labels: {app: fluence-deviceplugin}
+spec:
+  selector:
+    matchLabels: {app: fluence-deviceplugin}
+  template:
+    metadata:
+      labels: {app: fluence-deviceplugin}
+    spec:
+      priorityClassName: system-node-critical
+      tolerations:
+        - operator: Exists   # run on every node, including tainted/control-plane
+      containers:
+        - name: deviceplugin
+          image: ghcr.io/converged-computing/fluence:test
+          command: ["/bin/fluence-deviceplugin"]
+          env:
+            - name: FLUENCE_RESOURCES
+              value: /etc/fluence/resources.yaml
+            - name: FLUENCE_RESOURCE_CAPACITY
+              value: "1000"
+          securityContext:
+            privileged: true
+          volumeMounts:
+            - name: device-plugin
+              mountPath: /var/lib/kubelet/device-plugins
+            - name: resources
+              mountPath: /etc/fluence
+      volumes:
+        - name: device-plugin
+          hostPath:
+            path: /var/lib/kubelet/device-plugins
+        - name: resources
+          configMap:
+            name: fluence-resources
diff --git a/deploy/fluence-test.yaml b/deploy/fluence-test.yaml
new file mode 100644
index 0000000..525713e
--- /dev/null
+++ b/deploy/fluence-test.yaml
@@ -0,0 +1,224 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: fluence
+  namespace: kube-system
+---
+# Bind the built-in scheduler roles so fluence (a full kube-scheduler build) has
+# every list/watch the scheduling framework needs (nodes, pods, PV/PVC, CSI,
+# storageclasses, resourceclaims/slices, volumeattachments, events, etc.).
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: fluence-as-kube-scheduler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:kube-scheduler
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: fluence-as-volume-scheduler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:volume-scheduler
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+# Delegated authentication: read the auth configmap in kube-system. This is the
+# fix for the "extension-apiserver-authentication ... forbidden" errors.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: fluence-extension-apiserver-authentication-reader
+  namespace: kube-system
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: extension-apiserver-authentication-reader
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+# Extras the built-in scheduler role does not grant: the alpha PodGroup/Workload
+# API (gang), and leader-election leases under our scheduler name.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: fluence-extra
+rules:
+  - apiGroups: ["scheduling.k8s.io"]
+    resources: ["podgroups", "workloads", "podgroups/status", "workloads/status"]
+    verbs: ["get", "list", "watch", "update", "patch"]
+  - apiGroups: ["coordination.k8s.io"]
+    resources: ["leases"]
+    verbs: ["create", "get", "update", "list", "watch"]
+  # PreBind stamps the allocated backend onto the pod as an annotation; the
+  # built-in system:kube-scheduler role only allows patching pods/status, not
+  # the pod object, so grant it here.
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch", "patch", "update"]
+  # The webhook self-manages its TLS by patching its own config's caBundle.
+  - apiGroups: ["admissionregistration.k8s.io"]
+    resources: ["mutatingwebhookconfigurations"]
+    verbs: ["get", "list", "watch", "patch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: fluence-extra
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: fluence-extra
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: fluence-scheduler-config
+  namespace: kube-system
+data:
+  scheduler-config.yaml: |
+    apiVersion: kubescheduler.config.k8s.io/v1
+    kind: KubeSchedulerConfiguration
+    leaderElection:
+      leaderElect: false
+    profiles:
+      - schedulerName: fluence
+        plugins:
+          # multiPoint wires Fluence into every extension point its Go type
+          # implements: PreFilter, Filter, and PreBind (which stamps the backend
+          # annotation). Listing points individually risks omitting one — that is
+          # exactly what left PreBind unwired and the backend annotation unset.
+          multiPoint:
+            enabled: [{name: Fluence}]
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: fluence
+  namespace: kube-system
+  labels: {app: fluence}
+spec:
+  replicas: 1
+  selector:
+    matchLabels: {app: fluence}
+  template:
+    metadata:
+      labels: {app: fluence}
+    spec:
+      serviceAccountName: fluence
+      containers:
+        - name: fluence
+          image: ghcr.io/converged-computing/fluence:test
+          # Allows for kind load
+          # imagePullPolicy: Never
+          command:
+            - /bin/fluence
+            - --config=/etc/fluence/scheduler-config.yaml
+            # fluence is its own scheduler binary, so it needs the gang gates set
+            # here (the cluster-level kube-scheduler gates don't apply to it).
+            # Without these its PodGroup/GangScheduling plugin is inactive, pods
+            # schedule with no gang semantics, and PodGroup status stays Pending.
+            - --feature-gates=GenericWorkload=true,GangScheduling=true
+            - --v=4
+          env:
+            # Path to the resources config (e.g. quantum backends). Unset/empty
+            # file -> classical-only graph. Supplied by the quantum add-on.
+            - name: FLUENCE_RESOURCES
+              value: /etc/fluence/resources.yaml
+          volumeMounts:
+            - name: config
+              mountPath: /etc/fluence
+      volumes:
+        - name: config
+          projected:
+            sources:
+              - configMap: {name: fluence-scheduler-config}
+              - configMap: {name: fluence-resources, optional: true}
+---
+# Mutating webhook: injects scheduler-chosen values into pods at creation time
+# (currently a downward-API QRMI_BACKEND env for quantum pods). It self-manages
+# TLS — generates a CA + serving cert at startup and patches the caBundle below —
+# so no cert-manager and no committed keys. failurePolicy Ignore keeps a webhook
+# outage from blocking pod creation cluster-wide.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: fluence-webhook
+  namespace: kube-system
+  labels: {app: fluence-webhook}
+spec:
+  replicas: 1
+  selector:
+    matchLabels: {app: fluence-webhook}
+  template:
+    metadata:
+      labels: {app: fluence-webhook}
+    spec:
+      serviceAccountName: fluence
+      containers:
+        - name: webhook
+          image: ghcr.io/converged-computing/fluence:test
+          # Allows for kind load
+          # imagePullPolicy: Never
+          command: ["/bin/fluence-webhook"]
+          ports:
+            - containerPort: 8443
+          readinessProbe:
+            httpGet: {path: /healthz, port: 8443, scheme: HTTPS}
+            initialDelaySeconds: 2
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: fluence-webhook
+  namespace: kube-system
+spec:
+  selector: {app: fluence-webhook}
+  ports:
+    - port: 443
+      targetPort: 8443
+---
+apiVersion: admissionregistration.k8s.io/v1
+kind: MutatingWebhookConfiguration
+metadata:
+  name: fluence-webhook
+webhooks:
+  - name: pods.fluence.flux-framework.org
+    admissionReviewVersions: ["v1"]
+    sideEffects: None
+    failurePolicy: Ignore        # never block pod creation if the webhook is down
+    # caBundle is filled in at runtime by the webhook patching this object.
+    clientConfig:
+      service:
+        name: fluence-webhook
+        namespace: kube-system
+        path: /mutate
+        port: 443
+    rules:
+      - apiGroups: [""]
+        apiVersions: ["v1"]
+        operations: ["CREATE"]
+        resources: ["pods"]
+        scope: Namespaced
+    # Don't intercept system pods (and avoid bootstrap coupling).
+    namespaceSelector:
+      matchExpressions:
+        - key: kubernetes.io/metadata.name
+          operator: NotIn
+          values: ["kube-system"]
diff --git a/deploy/fluence.yaml b/deploy/fluence.yaml
index 8090a34..0b66246 100644
--- a/deploy/fluence.yaml
+++ b/deploy/fluence.yaml
@@ -125,8 +125,6 @@ spec:
       containers:
         - name: fluence
           image: ghcr.io/converged-computing/fluence:latest
-          # Uncomment for development or if loaded into kind
-          # imagePullPolicy: Never
           command:
             - /bin/fluence
             - --config=/etc/fluence/scheduler-config.yaml
@@ -174,7 +172,6 @@ spec:
       containers:
         - name: webhook
           image: ghcr.io/converged-computing/fluence:latest
-          imagePullPolicy: Never
           command: ["/bin/fluence-webhook"]
           ports:
             - containerPort: 8443
diff --git a/deploy/kind-config.yaml b/deploy/kind-config.yaml
index ec310bc..c94e070 100644
--- a/deploy/kind-config.yaml
+++ b/deploy/kind-config.yaml
@@ -32,4 +32,4 @@ nodes:
             - name: feature-gates
               value: "GenericWorkload=true"
   - role: worker
-  - role: worker
\ No newline at end of file
+  - role: worker
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..4ec6a49
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,25 @@
+FROM fluxrm/flux-core:noble AS builder
+
+# base image for fluence to speed up build time
+# docker build -t ghcr.io/converged-computing/fluence-base:latest .
+
+USER root
+ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/lib
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        libboost-graph-dev libboost-system-dev libboost-filesystem-dev \
+        libboost-regex-dev libyaml-cpp-dev libedit-dev libczmq-dev \
+        python3-yaml ninja-build cmake curl git wget ca-certificates \
+ && rm -rf /var/lib/apt/lists/*
+
+# Go toolchain
+RUN wget -q https://go.dev/dl/go1.26.0.linux-amd64.tar.gz \
+ && tar -C /usr/local -xzf go1.26.0.linux-amd64.tar.gz && rm go1.26.0.linux-amd64.tar.gz
+ENV PATH=$PATH:/usr/local/go/bin
+
+RUN wget https://github.com/flux-framework/flux-sched/releases/download/v0.52.0/flux-sched-0.52.0.tar.gz && \ 
+    tar -xzvf flux-sched-0.52.0.tar.gz && mv flux-sched-0.52.0 /opt/flux-sched \
+ && cd /opt/flux-sched && export WITH_GO=yes && ./configure --prefix=/usr \
+ && mkdir build && cd build && cmake ../ && cd ../ && make -j"$(nproc)" && make install
+ENV FLUX_SCHED_ROOT=/opt/flux-sched
diff --git a/examples/podgroup.yaml b/examples/podgroup.yaml
index 068e56c..7184a8f 100644
--- a/examples/podgroup.yaml
+++ b/examples/podgroup.yaml
@@ -33,4 +33,4 @@ spec:
           resources:
             requests:
               cpu: "4"
-              memory: 8Gi
\ No newline at end of file
+              memory: 8Gi
diff --git a/examples/single-podgroup.yaml b/examples/single-podgroup.yaml
new file mode 100644
index 0000000..e44780b
--- /dev/null
+++ b/examples/single-podgroup.yaml
@@ -0,0 +1,35 @@
+# Native gang scheduling (k8s >= 1.36, GangScheduling/GenericWorkload gates on).
+# Fluence does placement; the PodGroup gives all-or-nothing semantics. Pods link
+# to the PodGroup via the first-class field spec.schedulingGroup.podGroupName.
+apiVersion: scheduling.k8s.io/v1alpha2
+kind: PodGroup
+metadata:
+  name: training
+spec:
+  schedulingPolicy:
+    gang:
+      minCount: 1
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: training
+spec:
+  replicas: 1
+  selector:
+    matchLabels: {app: training}
+  template:
+    metadata:
+      labels:
+        app: training
+    spec:
+      schedulerName: fluence
+      schedulingGroup:
+        podGroupName: training
+      containers:
+        - name: worker
+          image: busybox
+          command: ["sleep", "3600"]
+          resources:
+            requests:
+              cpu: "1"
diff --git a/examples/test/e2e/quantum-pod-mock-2.yaml b/examples/test/e2e/quantum-pod-mock-2.yaml
new file mode 100644
index 0000000..5286fe0
--- /dev/null
+++ b/examples/test/e2e/quantum-pod-mock-2.yaml
@@ -0,0 +1,19 @@
+# A second qpu pod, identical request to sampler-mock. Used by the restart-recovery
+# test: after fluence restarts and replays the first allocation, this one must NOT be
+# given the same exclusive backend (it should stay Pending if the backend is taken).
+apiVersion: v1
+kind: Pod
+metadata:
+  name: sampler-mock-2
+spec:
+  schedulerName: fluence
+  restartPolicy: Never
+  containers:
+    - name: sampler
+      image: busybox
+      command: ["sh", "-c", "echo BACKEND=$QRMI_BACKEND; sleep 3600"]
+      resources:
+        requests:
+          fluxion.flux-framework.org/qpu: "1"
+        limits:
+          fluxion.flux-framework.org/qpu: "1"
diff --git a/examples/test/e2e/quantum-pod-mock.yaml b/examples/test/e2e/quantum-pod-mock.yaml
new file mode 100644
index 0000000..92849dd
--- /dev/null
+++ b/examples/test/e2e/quantum-pod-mock.yaml
@@ -0,0 +1,21 @@
+# CI mock of examples/quantum-pod.yaml: requests the same fluxion.flux-framework.org/qpu
+# resource so fluence matches a backend and the webhook injects QRMI_BACKEND, but runs a
+# trivial container instead of qrmi-sampler -- no IBM secrets, no real submission. Proves
+# placement + the backend handoff end to end.
+apiVersion: v1
+kind: Pod
+metadata:
+  name: sampler-mock
+spec:
+  schedulerName: fluence
+  restartPolicy: Never
+  containers:
+    - name: sampler
+      image: busybox
+      # Print the injected backend, then idle so we can assert on it.
+      command: ["sh", "-c", "echo BACKEND=$QRMI_BACKEND; sleep 3600"]
+      resources:
+        requests:
+          fluxion.flux-framework.org/qpu: "1"
+        limits:
+          fluxion.flux-framework.org/qpu: "1"
diff --git a/pkg/fluence/fluence.go b/pkg/fluence/fluence.go
index 58d069d..13cdcb3 100644
--- a/pkg/fluence/fluence.go
+++ b/pkg/fluence/fluence.go
@@ -106,6 +106,7 @@ func New(ctx context.Context, _ runtime.Object, h fwk.Handle) (fwk.Plugin, error
 	if err != nil {
 		return nil, fmt.Errorf("build resource graph: %w", err)
 	}
+	fmt.Printf("Fluence resource graph:\n%s", jgfBytes)
 
 	// FluxionGraph.Init reads from a file path, so stage the generated graph.
 	tmp, err := os.CreateTemp("", "fluence-graph-*.json")
@@ -168,17 +169,21 @@ func (f *Fluence) PreFilter(
 		return nil, fwk.AsStatus(err)
 	}
 
+	fmt.Printf("Attempting to match:\n%s\n", specYAML)
 	f.matcherMu.Lock()
 	req, err := f.matcher.MatchAllocateSpec(specYAML)
 	f.matcherMu.Unlock()
 	if err != nil {
+		fmt.Printf("FAIL Match failed: %s\n", err)
 		return nil, fwk.NewStatus(fwk.Unschedulable, fmt.Sprintf("fluxion match failed: %v", err))
 	}
 	place, err := placement.PlacementFromAllocation(req.Allocation)
 	if err != nil {
+		fmt.Printf("FAIL Placement failed: %s\n", err)
 		return nil, fwk.AsStatus(err)
 	}
 	if len(place.Nodes) == 0 && place.Backend == "" {
+		fmt.Println("FAIL No nodes")
 		return nil, fwk.NewStatus(fwk.Unschedulable, "fluxion returned no allocation")
 	}
 	// A quantum-only allocation has a Backend but no Nodes (a qpu vertex lives
diff --git a/pkg/jgf/jgf.go b/pkg/jgf/jgf.go
index 140a39f..ca9bd98 100644
--- a/pkg/jgf/jgf.go
+++ b/pkg/jgf/jgf.go
@@ -10,6 +10,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"sort"
+	"strings"
 )
 
 const containment = "containment"
@@ -188,6 +189,10 @@ type Doc struct {
 func (b *Builder) Doc() Doc {
 	d := Doc{}
 	for _, v := range b.vertices {
+		// Skip the control plane
+		if strings.Contains(v.name, "control-plane") {
+			continue
+		}
 		d.Graph.Nodes = append(d.Graph.Nodes, node{ID: v.key, Metadata: metadata{v: v}})
 	}
 	for _, e := range b.edges {
diff --git a/pkg/placement/placement.go b/pkg/placement/placement.go
index 46c4222..0213dc4 100644
--- a/pkg/placement/placement.go
+++ b/pkg/placement/placement.go
@@ -65,27 +65,17 @@ func podResources(p *corev1.Pod) map[string]int {
 			}
 		}
 	}
-	// A pod that requested no exotic (non-classical) resource still needs at
-	// least one core to land on.
-	if !hasExotic(counts) && counts["core"] == 0 {
+
+	// Every pod runs on a node, so always request at least one core, even when the
+	// pod also asks for an exotic resource (qpu/qubit). Without this a qpu-only pod
+	// produces a slot with no compute and Fluxion allocates a bare backend with no
+	// node to land the pod on.
+	if counts["core"] == 0 {
 		counts["core"] = 1
 	}
 	return counts
 }
 
-// hasExotic reports whether counts contains any non-classical type (i.e. one
-// that came through the Fluxion prefix, like qpu/qubit).
-func hasExotic(counts map[string]int) bool {
-	for t := range counts {
-		switch t {
-		case "core", "memory", "gpu":
-		default:
-			return true
-		}
-	}
-	return false
-}
-
 // JobspecForGroup builds a Fluxion jobspec for a whole pod group: a slot per pod
 // (count = group size), each holding the per-pod resources as `with` entries —
 // one per requested Fluxion type. A hybrid pod (e.g. cores + a qpu) produces a
diff --git a/test/e2e/01-classical-gang.sh b/test/e2e/01-classical-gang.sh
new file mode 100644
index 0000000..7c3e4b2
--- /dev/null
+++ b/test/e2e/01-classical-gang.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Classical gang scheduling: a PodGroup of 2 must be placed all-or-nothing on real nodes.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE}/lib.sh"
+
+log "TEST 1: classical gang scheduling"
+kubectl apply -f examples/single-podgroup.yaml
+
+# All pods in the 'training' deployment must reach Running (scheduled + started).
+log "waiting for both training pods to schedule"
+kubectl wait --for=condition=Ready pod -l app=training || fail "training gang did not all become Ready (gang scheduling failed)"
+
+# Each pod must have a real node assigned by fluence.
+for p in $(kubectl get pods -l app=training -o name); do
+  pod="${p#pod/}"
+  assert_scheduled "$pod" || fail "$pod has no nodeName"
+  sched="$(kubectl get pod "$pod" -o jsonpath='{.spec.schedulerName}')"
+  [ "$sched" = "fluence" ] || fail "$pod was not scheduled by fluence (got: $sched)"
+done
+
+count="$(kubectl get pods -l app=training --no-headers | wc -l | tr -d ' ')"
+[ "$count" = "1" ] || fail "expected 2 training pods, got $count"
+
+log "PASS: classical gang placed all $count pods via fluence"
+kubectl delete -f examples/single-podgroup.yaml --wait=false || true
+kubectl patch podgroup training --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
diff --git a/test/e2e/02-quantum-placement.sh b/test/e2e/02-quantum-placement.sh
new file mode 100644
index 0000000..17897a3
--- /dev/null
+++ b/test/e2e/02-quantum-placement.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# Quantum placement: a qpu pod is matched to a backend and the webhook injects QRMI_BACKEND.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE}/lib.sh"
+ANN="fluence.flux-framework.org/backend"
+
+log "TEST 2: quantum placement and backend handoff"
+kubectl apply -f examples/test/e2e/quantum-pod-mock.yaml
+
+wait_pod_phase sampler-mock Running 120 || fail "sampler-mock did not reach Running"
+
+# fluence must have stamped the chosen backend annotation.
+backend="$(kubectl get pod sampler-mock -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" 2>/dev/null || true)"
+[ -n "$backend" ] || (show_webhook sampler-mock && fail "backend annotation ($ANN) was not set by fluence")
+log "fluence chose backend: $backend"
+
+# The webhook must have surfaced it as QRMI_BACKEND inside the container.
+out="$(kubectl logs sampler-mock || true)"
+echo "$out" | grep -q "BACKEND=${backend}" \
+  || (show_webhook sampler-mock && fail "QRMI_BACKEND in container ('$out') does not match annotation ($backend)")
+
+log "PASS: qpu pod scheduled, backend '$backend' chosen and injected as QRMI_BACKEND"
+kubectl delete -f examples/test/e2e/quantum-pod-mock.yaml --wait=false || true
diff --git a/test/e2e/03-restart-recovery.sh b/test/e2e/03-restart-recovery.sh
new file mode 100644
index 0000000..20c1be9
--- /dev/null
+++ b/test/e2e/03-restart-recovery.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# Restart recovery: after the scheduler restarts, it must replay the existing allocation
+# (via reapi update_allocate) and NOT double-book an exclusive qpu backend.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE}/lib.sh"
+NS=default
+ANN="fluence.flux-framework.org/backend"
+
+log "TEST 3: restart does not double-book an exclusive backend"
+
+# 1. Schedule the first qpu pod and capture its backend.
+kubectl apply -f examples/test/e2e/quantum-pod-mock.yaml
+wait_pod_phase sampler-mock "$NS" Running 120 || fail "sampler-mock did not reach Running"
+backend="$(kubectl get pod sampler-mock -n "$NS" -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" 2>/dev/null || true)"
+[ -n "$backend" ] || fail "first pod has no backend annotation"
+log "first pod holds backend: $backend"
+
+# 2. Restart the scheduler. Its in-memory Fluxion graph is rebuilt empty; recovery must
+#    replay the persisted allocation so the backend stays occupied.
+log "restarting fluence scheduler"
+kubectl rollout restart -n "${NS_KUBE}" deployment/fluence
+wait_fluence_ready
+
+# 3. The original pod must still be Running and still hold the same backend.
+wait_pod_phase sampler-mock "$NS" Running 30 || fail "first pod not Running after restart"
+
+# 4. A second pod requesting the same exclusive qpu must NOT get the same backend.
+#    If recovery worked, the backend is occupied and the second pod stays Pending.
+kubectl apply -f examples/test/e2e/quantum-pod-mock-2.yaml
+if assert_stays_pending sampler-mock-2 "$NS" 45; then
+  log "PASS: second qpu pod stayed Pending; backend '$backend' was not double-booked"
+else
+  backend2="$(kubectl get pod sampler-mock-2 -n "$NS" -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" 2>/dev/null || true)"
+  if [ "$backend2" = "$backend" ]; then
+    fail "DOUBLE-BOOK: second pod got the same exclusive backend '$backend' after restart (recovery did not replay)"
+  else
+    log "NOTE: second pod scheduled to a DIFFERENT backend '$backend2' (ok only if >1 backend configured)"
+  fi
+fi
+
+kubectl delete -f examples/test/e2e/quantum-pod-mock-2.yaml --wait=false || true
+kubectl delete -f examples/test/e2e/quantum-pod-mock.yaml --wait=false || true
diff --git a/test/e2e/lib.sh b/test/e2e/lib.sh
new file mode 100644
index 0000000..341634a
--- /dev/null
+++ b/test/e2e/lib.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+# Shared helpers for fluence e2e tests. Sourced by each scenario script.
+set -euo pipefail
+
+NS_KUBE="${NS_KUBE:-kube-system}"
+TIMEOUT="${TIMEOUT:-180s}"
+
+log()  { echo "=== $*"; }
+fail() { echo "FAIL: $*" >&2; dump; exit 1; }
+
+# Dump cluster state to help debug a CI failure.
+dump() {
+  echo "----- pods (all namespaces) -----"
+  kubectl get pods -A -o wide || true
+  echo "----- events -----"
+  kubectl get events -A --sort-by=.lastTimestamp | tail -40 || true
+  echo "----- fluence scheduler logs -----"
+  kubectl logs -n "${NS_KUBE}" deployment/fluence --tail=120 || true
+}
+
+# Wait until the fluence scheduler deployment is Available.
+wait_fluence_ready() {
+  log "waiting for fluence scheduler to be ready"
+  kubectl rollout status -n "${NS_KUBE}" deployment/fluence --timeout="${TIMEOUT}" \
+    || fail "fluence deployment did not become ready"
+}
+
+show_webhook() {
+  pod=$1
+  echo "FAIL: QRMI_BACKEND mismatch"
+  kubectl get pod $pod -o jsonpath='{.spec.containers[0].env}'; echo
+  kubectl get pod $pod -o jsonpath='{.metadata.annotations}'; echo
+  kubectl -n kube-system logs deploy/fluence-webhook --tail=50
+}
+
+# wait_pod_phase <pod> <namespace> <phase> [timeout]
+wait_pod_phase() {
+  local pod="$1" want="$2" t="${3:-120}"
+  echo "pod: $pod"
+  echo "want: $want"
+  echo "t: $t"
+  local i=0
+  while [ "$i" -lt "$t" ]; do
+    local got
+    got="$(kubectl get pod "$pod" -o jsonpath='{.status.phase}' 2>/dev/null || true)"
+    [ "$got" = "$want" ] && return 0
+    sleep 1; i=$((i+1))
+  done
+  return 1
+}
+
+# assert_scheduled <pod> <namespace>: pod has a node assigned.
+assert_scheduled() {
+  local pod="$1" node
+  node="$(kubectl get pod "$pod" -o jsonpath='{.spec.nodeName}' 2>/dev/null || true)"
+  [ -n "$node" ] || return 1
+  echo "  $pod scheduled on $node"
+}
+
+# Assert a pod stays Pending for the whole window (used for the "must not double-book" check).
+assert_stays_pending() {
+  local pod="$1" t="${2:-30}" i=0
+  while [ "$i" -lt "$t" ]; do
+    local node
+    node="$(kubectl get pod "$pod" -o jsonpath='{.spec.nodeName}' 2>/dev/null || true)"
+    [ -n "$node" ] && return 1   # got scheduled -> failure for this assertion
+    sleep 1; i=$((i+1))
+  done
+  return 0
+}