diff --git a/src/odr/internal/html/pdf_file.cpp b/src/odr/internal/html/pdf_file.cpp
index 07a250d1..2fd8cad0 100644
--- a/src/odr/internal/html/pdf_file.cpp
+++ b/src/odr/internal/html/pdf_file.cpp
@@ -561,20 +561,35 @@ class HtmlServiceImpl final : public HtmlService {
     throw FileNotFound("Unknown path: " + path);
   }
 
-  // One emitted span. The styling is fully resolved into class tokens during
-  // the first pass; only the (already escaped) text and class list survive to
-  // the writing pass. A text run with an embedded font emits the dual layer as
-  // a transparent selectable span carrying the real Unicode with the visible
-  // glyph layer (PUA code points in the `@font-face` font) nested inside it:
-  // the child is absolutely positioned at the run origin and inherits the
-  // font size, spacing, and transform from the parent, so the placement
-  // classes live only on the parent. `glyph_classes` is empty when there is no
-  // nested layer (the legacy fallback path and display-only runs).
-  struct SpanOut {
-    std::string classes;
+  // One run within a line block. The line block owns the placement (position,
+  // font size, spacing); a run only carries its horizontal offset from the
+  // previous run (`margin`, computed at generation time) and its payload.
+  // `text` is the selectable/findable real Unicode (empty for a `no_unicode`
+  // run). `glyph_data`, when non-empty, is the PUA glyph string painted via
+  // `content:attr(data-g)` generated content — used when the run's real
+  // Unicode cannot render the correct glyphs directly (a lost cmap scalar, a
+  // /ToUnicode ligature expansion, an /ActualText substitution, or an inferred
+  // space). The visible glyph is then *out* of the DOM text stream, so it never
+  // breaks find/double-click across a word, and `text` rides alongside as a
+  // zero-width selectable overlay. When `glyph_data` is empty the run is a
+  // clean collapsed (or invisible / fallback) span and `text` renders directly.
+  // `color` is the optional per-run colour class suffix (overrides the block's
+  // default paint).
+  struct RunOut {
+    std::string margin; // "" or a margin-left class
+    std::string color;  // "" or a colour class suffix
     std::string text;
-    std::string glyph_classes;
-    std::string glyph_text;
+    std::string glyph_data;
+  };
+  // A line block: one absolutely-positioned container per PDF line (or per
+  // matrix run), owning the shared placement classes; its runs flow inline,
+  // each nudged by a gen-time `margin-left`, so the text is one contiguous,
+  // natively selectable/searchable string. `font_class` is the per-font
+  // family+colour class carried on the block (empty for the fallback path).
+  struct LineOut {
+    std::string classes;
+    std::string font_class;
+    std::vector<RunOut> runs;
   };
   // One vector item, already serialized to an SVG fragment in the page's
   // viewBox (PDF points, y-down): a painted `<path>` or an `<image>`.
@@ -582,9 +597,10 @@ class HtmlServiceImpl final : public HtmlService {
   struct PathOut {
     std::string svg;
   };
-  // Page content in paint (z) order: text spans and paths interleave, so a
-  // later fill occludes earlier text and vice versa.
-  using PageItem = std::variant<SpanOut, PathOut>;
+  // Page content in paint (z) order: line blocks and paths interleave, so a
+  // later fill occludes earlier text and vice versa. A line block is flushed
+  // (no further runs appended) before any path so paint order is preserved.
+  using PageItem = std::variant<LineOut, PathOut>;
   struct PageOut {
     std::string classes;
     double width{0};  // page box width, PDF points (for the SVG viewBox)
@@ -630,13 +646,15 @@ class HtmlServiceImpl final : public HtmlService {
     // emits `font_faces`.
     std::uint32_t family_count = 0;
     std::string font_faces;
-    std::string glyph_styles; // combined per-font `.fvN`/`.fnN`/`.gvN`/`.giN`
+    std::string glyph_styles; // per-font `.fvN` (visible) / `.fnN` (invisible)
     std::vector<pdf::Font *> accepted_fonts;
     std::vector<std::map<char32_t, std::uint16_t>> used_unicode;
-    // Which combined per-font classes occur, so only those are emitted in
-    // <head>. Slots: [0]=`.fvN` (plain visible), [1]=`.fnN` (plain invisible),
-    // [2]=`.gvN` (nested-glyph visible), [3]=`.giN` (nested-glyph invisible).
-    std::vector<std::array<bool, 4>> font_class_used;
+    // Which per-font classes occur, so only those are emitted in <head>.
+    // Slots: [0]=`.fvN` (visible: family + black), [1]=`.fnN` (invisible:
+    // family + transparent). Both are placement-free — the line block owns
+    // placement — and serve the run text, the generated-content glyph, and the
+    // overlay alike.
+    std::vector<std::array<bool, 2>> font_class_used;
     std::unordered_map<const pdf::Font *, std::uint32_t> family_index;
     const auto font_family = [&](pdf::Font *font) -> std::uint32_t {
       const auto [it, inserted] = family_index.try_emplace(font, 0);
@@ -683,23 +701,18 @@ class HtmlServiceImpl final : public HtmlService {
       it->second = index;
       accepted_fonts.push_back(font);
       used_unicode.emplace_back();
-      font_class_used.push_back({false, false, false, false});
+      font_class_used.push_back({false, false});
       return index;
     };
 
-    // The combined per-font class carrying `font-family:'odr-fN'` and the paint
-    // colour, so a font-bearing span names one class instead of restating the
-    // font family (interned) plus `.gv`/`.i` on every one of the millions of
-    // spans. A `nested` glyph layer additionally folds in the `.t .g` placement
-    // (absolute at the run origin, unselectable). Records the combo as used so
-    // only the rules that occur are emitted in <head>.
-    const auto font_class = [&](const std::uint32_t font, const bool inv,
-                                const bool nested) {
-      const int slot = nested ? (inv ? 3 : 2) : (inv ? 1 : 0);
-      font_class_used[font - 1][slot] = true;
-      const char *const prefix =
-          nested ? (inv ? "gi" : "gv") : (inv ? "fn" : "fv");
-      return prefix + std::to_string(font);
+    // The per-font class carrying `font-family:'odr-fN'` and the default paint
+    // colour (black when visible, transparent when invisible), so a line block
+    // names one class for its font instead of restating the family on every
+    // run. Records the variant as used so only the rules that occur are emitted
+    // in <head>.
+    const auto font_class = [&](const std::uint32_t font, const bool inv) {
+      font_class_used[font - 1][inv ? 1 : 0] = true;
+      return (inv ? "fn" : "fv") + std::to_string(font);
     };
 
     // A real-Unicode scalar may carry a `cmap` entry (letting its run collapse)
@@ -733,6 +746,19 @@ class HtmlServiceImpl final : public HtmlService {
       return std::move(s).str();
     };
 
+    // Markup-only escaping for run text. Unlike `escape_text` this keeps real
+    // U+0020 spaces (no `&nbsp;`): the line block carries `white-space:pre`, so
+    // runs of spaces survive layout *and* copy/search as ordinary spaces, which
+    // native word selection needs. The `data-g` glyph strings hold only PUA
+    // code points (U+E000..), so they never contain `&`, `<` or `>` and need no
+    // escaping.
+    const auto escape_markup = [](std::string s) {
+      util::string::replace_all(s, "&", "&amp;");
+      util::string::replace_all(s, "<", "&lt;");
+      util::string::replace_all(s, ">", "&gt;");
+      return s;
+    };
+
     // A run's baseline-to-top distance, in em, so it can be placed by its
     // baseline (PDF's text origin) rather than by its CSS box top — which is
     // one ascent above the baseline. Prefers the FontDescriptor `/Ascent`, then
@@ -796,6 +822,22 @@ class HtmlServiceImpl final : public HtmlService {
       GradientRegistry gradients(static_cast<std::uint32_t>(pages_out.size()));
       PatternRegistry patterns(static_cast<std::uint32_t>(pages_out.size()));
 
+      // --- Per-line flow state ---------------------------------------------
+      // Runs that share a baseline, font, size and spacing flow inline inside
+      // one absolutely-positioned line block; each run after the first is
+      // nudged by a gen-time `margin-left` equal to the gap from the previous
+      // run's right edge (signed, so kerning pull-backs work too). A painted
+      // path closes the open block (`close_line`) so the next text starts a
+      // fresh block *after* the path in `items`, preserving paint order. A
+      // rotated/skewed (matrix) run always gets its own one-run block.
+      int cur_line = -1;        // index of the open LineOut in `items`, or -1
+      std::string cur_flow_key; // flow key of the open block
+      bool prev_was_matrix = false;
+      double prev_end = 0;      // previous run's right edge, box points
+      double prev_baseline = 0; // previous run's baseline, box points
+      double prev_font_pt = 0;  // previous run's Tz-free em, points
+      const auto close_line = [&] { cur_line = -1; };
+
       for (const pdf::PageElement &element :
            pdf::extract_page(stream, *page->resources, *m_logger)) {
         // A painted path: serialize its subpaths to an SVG `<path>` fragment in
@@ -817,6 +859,7 @@ class HtmlServiceImpl final : public HtmlService {
           std::string fragment =
               svg_path_fragment(*path, to_box, clip_id, fill_url_id);
           if (!fragment.empty()) {
+            close_line();
             page_out.items.push_back(PathOut{std::move(fragment)});
           }
           continue;
@@ -836,6 +879,7 @@ class HtmlServiceImpl final : public HtmlService {
           std::string fragment =
               svg_shading_fragment(gradient_id, clip_id, width, height);
           if (!fragment.empty()) {
+            close_line();
             page_out.items.push_back(PathOut{std::move(fragment)});
           }
           continue;
@@ -847,6 +891,7 @@ class HtmlServiceImpl final : public HtmlService {
           const std::string clip_id = clips.register_clip(image->clip, to_box);
           std::string fragment = svg_image_fragment(*image, to_box, clip_id);
           if (!fragment.empty()) {
+            close_line();
             page_out.items.push_back(PathOut{std::move(fragment)});
           }
           continue;
@@ -897,172 +942,165 @@ class HtmlServiceImpl final : public HtmlService {
           }
         }
 
-        // Placement and spacing are shared by both layers of a run; build them
-        // once on `base`.
-        std::string base = "t";
-
-        // Place by the baseline: PDF's text origin (`m.e`, `m.f`) is the glyph
-        // baseline, but a CSS span anchors its box top, which sits one ascent
-        // above the baseline. Shift the origin up by the ascent along the run's
-        // local y axis so the baseline lands on the PDF origin.
+        // --- Run geometry (no class interning yet) ---------------------------
+        // A rotated/skewed run is placed by a CSS matrix and cannot flow; an
+        // upright uniform run is placed by left/top with its scale folded into
+        // the font size. `scale` is the glyph linear factor (folded into
+        // font-size when uniform, 1 when matrix, so Tc/Tw stay pre-transform).
+        const bool is_matrix = !(m.b == 0 && m.c == 0 && m.a == m.d);
         const double asc = ascent_em(text);
-
-        // Tc/Tw are absolute text-space lengths (not scaled by the font size).
-        // One text-space unit is `scale * pt_to_px` CSS px, where `scale` is
-        // the linear factor we apply to the glyphs: folded into `font-size` in
-        // the uniform branch, carried by the CSS matrix in the general branch
-        // (so spacing there is expressed pre-transform, scale == 1).
-        double scale;
-        if (m.b == 0 && m.c == 0 && m.a == m.d) {
-          // Upright uniform scale: fold the scale into the font size and place
-          // the origin with left/top, so the (otherwise near-universal) matrix
-          // is dropped. The ascent shift is purely vertical here (local y maps
-          // to box y, scaled by `m.a`).
-          add_class(base, "l", px_decl("left", round2(m.e * pt_to_px)));
-          add_class(
-              base, "t",
-              px_decl("top", round2((m.f - asc * m.a * text.size) * pt_to_px)));
-          add_class(base, "f",
-                    px_decl("font-size", round2(m.a * text.size * pt_to_px)));
-          scale = m.a;
+        const double scale = is_matrix ? 1.0 : m.a;
+        // Origin, baseline and horizontal extent in page-box points (y down).
+        // `text.width` lives in the text matrix's space; its box extent scales
+        // by the text-matrix -> box x-axis length. Tz (`horizontal_scaling`) is
+        // already folded into `text.width`, so divide it back out of the axis
+        // length to apply it once and keep `font_pt` the Tz-free em.
+        const double ox = m.e;
+        const double baseline = m.f;
+        const double tz = text.horizontal_scaling / 100.0;
+        const double axis = tz != 0 ? std::hypot(m.a, m.b) / tz : 0;
+        const double extent = text.width * axis;
+        const double font_pt = text.size * axis;
+        const double font_size_px =
+            round2((is_matrix ? text.size : m.a * text.size) * pt_to_px);
+        const double cs_px = round2(text.char_spacing * scale * pt_to_px);
+        const double ws_px = round2(text.word_spacing * scale * pt_to_px);
+
+        // --- Run payload -----------------------------------------------------
+        // Clean collapsible runs render the real Unicode directly in the
+        // embedded font (one selectable, searchable span). Unclean visible runs
+        // paint the glyphs via `content:attr(data-g)` generated content — kept
+        // out of the DOM text stream so they never break find mid-word — with
+        // the real Unicode riding alongside as a zero-width selectable overlay.
+        // `no_unicode` runs have only the glyph; invisible (Tr 3/7) and
+        // fallback runs render their Unicode as ordinary (transparent /
+        // fallback) text.
+        RunOut run;
+        run.color = color_suffix;
+        if (font == 0 || invisible) {
+          run.text = escape_markup(text.text);
         } else {
-          // The ascent shift is `asc` em down the local y axis, whose direction
-          // in the box is the matrix's (c, d) column; subtract it from the
-          // translation so the baseline, not the box top, lands on the origin.
-          const double ascent_px = asc * text.size * pt_to_px;
-          std::ostringstream tm;
-          tm << "transform:matrix(" << m.a << "," << m.b << "," << m.c << ","
-             << m.d << "," << round2(m.e * pt_to_px - m.c * ascent_px) << ","
-             << round2(m.f * pt_to_px - m.d * ascent_px) << ")";
-          add_class(base, "m", std::move(tm).str());
-          add_class(base, "f",
-                    px_decl("font-size", round2(text.size * pt_to_px)));
-          scale = 1;
-        }
-
-        // PDF char/word spacing (Tc/Tw) translate directly to CSS. TJ kerning
-        // needs no expression here: `extract_text` emits a separate segment per
-        // TJ string and folds the adjustment into the following segment's
-        // `transform`, so a segment only carries its constant spacing. Emitted
-        // only when non-zero to keep the (overwhelmingly common) unspaced span
-        // small.
-        //
-        // CSS letter-/word-spacing key on the *rendered* string's character and
-        // space boundaries, but PDF Tc/Tw advance the text matrix per raw code
-        // (Tw only on a simple font's single-byte 0x20; ISO 32000-1 9.3.3). The
-        // two coincide only when the rendered run is 1:1 with the codes. The
-        // glyph layer always is (one PUA code point per code, `font != 0`); the
-        // Unicode text layer is not when a /ToUnicode CMap expands a code into
-        // several characters (ligatures), /ActualText substitutes text, or a
-        // space was inferred — there CSS would insert gaps the segment advances
-        // never accounted for, splitting glyphs and drifting the next
-        // absolutely-positioned segment. Gate emission on that correspondence;
-        // word spacing additionally never applies to a composite font.
-        const bool spacing_one_to_one =
-            font != 0 ||
-            (text.font != nullptr &&
-             util::string::utf8_length(text.text) == text.advances.size());
-        if (text.char_spacing != 0 && spacing_one_to_one) {
-          add_class(base, "s",
-                    px_decl("letter-spacing",
-                            round2(text.char_spacing * scale * pt_to_px)));
-        }
-        if (text.word_spacing != 0 && spacing_one_to_one &&
-            !(text.font != nullptr && text.font->composite)) {
-          add_class(base, "w",
-                    px_decl("word-spacing",
-                            round2(text.word_spacing * scale * pt_to_px)));
-        }
-
-        // A run collapses to a single span — selectable *and* visible, the real
-        // Unicode rendered directly in the embedded font — when it has an
-        // embedded font, carries text, is 1:1 with its codes (no /ToUnicode
-        // expansion, /ActualText, or inferred space), and every glyph wins a
-        // real-Unicode `cmap` entry. The winner of a scalar is the first
-        // collapse-candidate run (in document order) to use it; processing
-        // order *is* document order, so an earlier run's claim is already
-        // visible and no later run can unseat it — the decision is final here.
-        const bool collapse_candidate =
-            font != 0 && !text.text.empty() && text.font != nullptr &&
-            util::string::utf8_length(text.text) == text.advances.size();
-
-        if (collapse_candidate) {
           // Stake first-wins real-Unicode -> glyph claims and decide collapse
-          // in one walk: the run collapses iff each code's glyph wins (or
-          // matches) its scalar. Claims are staked for every collapsible scalar
-          // even when the run ends up dual, so later runs see them. The
-          // post-pass only bakes the won scalars into the shared font's `cmap`.
-          std::map<char32_t, std::uint16_t> &won = used_unicode[font - 1];
-          bool collapse = true;
-          auto cp = text.text.begin();
-          for (const std::uint32_t code : text.font->codes(text.codes)) {
-            const char32_t uchar = utf8::unchecked::next(cp);
-            const std::uint16_t glyph = text.font->glyph_for_code(code);
-            if (!collapsible_unicode(uchar)) {
-              collapse = false;
-              continue;
-            }
-            const auto [it, inserted] = won.emplace(uchar, glyph);
-            if (!inserted && it->second != glyph) {
-              collapse = false;
+          // in one walk (in document order, so an earlier run's claim is final
+          // and visible to later runs). The post-pass bakes the won scalars
+          // into the shared font's `cmap`. A run is collapsible only when it is
+          // 1:1 with its codes (no /ToUnicode expansion, /ActualText, inferred
+          // space) and every glyph wins (or matches) its scalar.
+          bool collapse =
+              !text.text.empty() && text.font != nullptr &&
+              util::string::utf8_length(text.text) == text.advances.size();
+          if (collapse) {
+            std::map<char32_t, std::uint16_t> &won = used_unicode[font - 1];
+            auto cp = text.text.begin();
+            for (const std::uint32_t code : text.font->codes(text.codes)) {
+              const char32_t uchar = utf8::unchecked::next(cp);
+              const std::uint16_t glyph = text.font->glyph_for_code(code);
+              if (!collapsible_unicode(uchar)) {
+                collapse = false;
+                continue;
+              }
+              const auto [it, inserted] = won.emplace(uchar, glyph);
+              if (!inserted && it->second != glyph) {
+                collapse = false;
+              }
             }
           }
           if (collapse) {
-            // One span: the real Unicode rendered in the embedded font, named
-            // by the combined per-font class (black visible / transparent
-            // invisible), selectable either way.
-            std::string classes = std::move(base);
-            classes += ' ';
-            classes += font_class(font, invisible, /*nested=*/false);
-            classes += color_suffix;
-            page_out.items.push_back(
-                SpanOut{std::move(classes), escape_text(text.text), {}, {}});
+            run.text = escape_markup(text.text);
           } else {
-            // Dual layer (a glyph lost its scalar to an earlier one): a
-            // transparent selectable Unicode span with the PUA glyph layer
-            // nested inside, the latter folded into the combined `.gvN` /
-            // `.giN` class. The colour rides the visible (nested) layer.
-            page_out.items.push_back(SpanOut{
-                base + " i", escape_text(text.text),
-                font_class(font, invisible, /*nested=*/true) + color_suffix,
-                escape_text(glyph_run(*text.font, text.codes))});
+            run.glyph_data = glyph_run(*text.font, text.codes);
+            run.text =
+                escape_markup(text.text); // overlay (empty for no_unicode)
           }
-        } else if (font != 0) {
-          // The visible glyph layer: PUA code points in the embedded font,
-          // named by the combined per-font class (paint colour + font family).
-          std::string glyph_text =
-              escape_text(glyph_run(*text.font, text.codes));
-
-          if (!text.text.empty()) {
-            // Dual layer: a transparent selectable span carrying the real
-            // Unicode (for copy/search) with the glyph layer nested inside.
-            // The nested child overlays the run origin and inherits the
-            // placement via the combined `.gvN` / `.giN` class.
-            page_out.items.push_back(SpanOut{
-                base + " i", escape_text(text.text),
-                font_class(font, invisible, /*nested=*/true) + color_suffix,
-                std::move(glyph_text)});
+        }
+        if (run.text.empty() && run.glyph_data.empty()) {
+          continue; // nothing to paint or select (invisible no_unicode)
+        }
+
+        // --- Flow grouping ---------------------------------------------------
+        // Runs sharing font, visibility, size and spacing on one baseline flow
+        // in a single block; a matrix run (or the run after one), a flow-key
+        // change, a baseline change or a large backward jump opens a new block.
+        std::ostringstream fk;
+        fk << font << '|' << invisible << '|' << font_size_px << '|' << cs_px
+           << '|' << ws_px;
+        const std::string flow_key = std::move(fk).str();
+        bool new_line = is_matrix || prev_was_matrix || cur_line < 0 ||
+                        flow_key != cur_flow_key;
+        double margin_px = 0;
+        if (!new_line && prev_font_pt > 0) {
+          if (std::abs(baseline - prev_baseline) > 0.6 * prev_font_pt ||
+              ox < prev_end - 0.5 * prev_font_pt) {
+            new_line = true;
+          } else {
+            // Gen-time gap to the previous run's right edge (signed). This is
+            // the run's `margin-left`: the browser flows the previous run by
+            // its embedded-font advance, so this reproduces the PDF x-position
+            // (exact when the font's `hmtx` matches the PDF `/Widths`).
+            margin_px = round2((ox - prev_end) * pt_to_px);
+          }
+        }
+
+        if (new_line) {
+          // Build the block's placement classes (interned only here, per line).
+          std::string base = "t";
+          if (!is_matrix) {
+            add_class(base, "l", px_decl("left", round2(m.e * pt_to_px)));
+            add_class(base, "t",
+                      px_decl("top", round2((m.f - asc * m.a * text.size) *
+                                            pt_to_px)));
           } else {
-            // Display-only run: nothing is extractable (the `no_unicode` case),
-            // so the glyph layer stands alone and carries the placement itself
-            // (`base`), `.g` (unselectable) and the combined paint+font class.
-            std::string glyph_classes = base + " g ";
-            glyph_classes += font_class(font, invisible, /*nested=*/false);
-            glyph_classes += color_suffix;
-            page_out.items.push_back(SpanOut{
-                std::move(glyph_classes), std::move(glyph_text), {}, {}});
+            // The ascent shift is `asc` em down the local y axis, whose box
+            // direction is the matrix's (c, d) column; subtract it from the
+            // translation so the baseline, not the box top, lands on the
+            // origin.
+            const double ascent_px = asc * text.size * pt_to_px;
+            std::ostringstream tm;
+            tm << "transform:matrix(" << m.a << "," << m.b << "," << m.c << ","
+               << m.d << "," << round2(m.e * pt_to_px - m.c * ascent_px) << ","
+               << round2(m.f * pt_to_px - m.d * ascent_px) << ")";
+            add_class(base, "m", std::move(tm).str());
           }
+          add_class(base, "f", px_decl("font-size", font_size_px));
+          // Tc/Tw as CSS letter-/word-spacing (see the per-font notes). For an
+          // embedded font the run is always 1:1 with codes; for the fallback
+          // path only when the rendered string matches the advance count.
+          const bool spacing_one_to_one =
+              font != 0 ||
+              (text.font != nullptr &&
+               util::string::utf8_length(text.text) == text.advances.size());
+          if (text.char_spacing != 0 && spacing_one_to_one) {
+            add_class(base, "s", px_decl("letter-spacing", cs_px));
+          }
+          if (text.word_spacing != 0 && spacing_one_to_one &&
+              !(text.font != nullptr && text.font->composite)) {
+            add_class(base, "w", px_decl("word-spacing", ws_px));
+          }
+          if (font == 0 && invisible) {
+            base += " i"; // fallback path: transparent via `.i`
+          }
+
+          LineOut line;
+          line.classes = std::move(base);
+          if (font != 0) {
+            line.font_class = font_class(font, invisible);
+          }
+          line.runs.push_back(std::move(run));
+          page_out.items.push_back(std::move(line));
+          cur_line = static_cast<int>(page_out.items.size()) - 1;
+          cur_flow_key = flow_key;
         } else {
-          // Legacy single-layer path: no embedded font, render the Unicode in a
-          // fallback font.
-          std::string classes = base;
-          if (invisible) {
-            classes += " i";
+          if (margin_px != 0) {
+            run.margin = styles.intern("ml", px_decl("margin-left", margin_px));
           }
-          classes += color_suffix;
-          page_out.items.push_back(
-              SpanOut{std::move(classes), escape_text(text.text), {}, {}});
+          std::get<LineOut>(page_out.items[cur_line])
+              .runs.push_back(std::move(run));
         }
+
+        prev_end = ox + extent;
+        prev_baseline = baseline;
+        prev_font_pt = font_pt;
+        prev_was_matrix = is_matrix;
       }
 
       // Clip-path, gradient and pattern defs share the page's hidden
@@ -1094,37 +1132,26 @@ class HtmlServiceImpl final : public HtmlService {
       font_faces += "@font-face{font-family:'odr-f" + std::to_string(i + 1) +
                     "';src:url(" + url + ");}";
 
-      // The combined per-font classes for this font, only those used. `.fvN` /
-      // `.fnN` carry just the paint colour and font family (placement stays on
-      // the span's own classes); `.gvN` / `.giN` additionally fold in the
-      // nested glyph layer's `.t` placement and `.g` unselectability.
+      // The per-font classes for this font, only those used. `.fvN` / `.fnN`
+      // carry just the paint colour and font family; the line block owns the
+      // placement, so these are placement-free and serve the run text, the
+      // generated-content glyph and the overlay alike.
       const std::string n = std::to_string(i + 1);
       const std::string family = "font-family:'odr-f" + n + "'";
-      constexpr const char *placement =
-          "position:absolute;left:0;top:0;transform-origin:0 0;"
-          "white-space:pre;line-height:1;user-select:none;";
-      const auto rule = [&](const char *cls, const char *head,
-                            const char *color) {
+      const auto rule = [&](const char *cls, const char *color) {
         glyph_styles += '.';
         glyph_styles += cls;
         glyph_styles += n;
         glyph_styles += '{';
-        glyph_styles += head;
         glyph_styles += color;
         glyph_styles += family;
         glyph_styles += '}';
       };
       if (font_class_used[i][0]) {
-        rule("fv", "", "color:#000;");
+        rule("fv", "color:#000;");
       }
       if (font_class_used[i][1]) {
-        rule("fn", "", "color:transparent;");
-      }
-      if (font_class_used[i][2]) {
-        rule("gv", placement, "color:#000;");
-      }
-      if (font_class_used[i][3]) {
-        rule("gi", placement, "color:transparent;");
+        rule("fn", "color:transparent;");
       }
     }
 
@@ -1147,9 +1174,13 @@ class HtmlServiceImpl final : public HtmlService {
     out.out() << "body{margin:0;background:#525659}";
     out.out() << ".p{position:relative;margin:16px auto;background:#fff;"
                  "box-shadow:0 1px 4px rgba(0,0,0,.5)}";
+    // `.t` is the per-line block: an absolutely-positioned, shrink-to-fit
+    // container holding the line's run spans, which flow inline (each nudged by
+    // a `margin-left`). `white-space:pre` preserves the runs' real spaces for
+    // copy/search and stops inter-run collapsing.
     // `font-kerning:none` + `font-variant-ligatures:none` keep the browser from
-    // applying the embedded font's GPOS/GSUB tables. A collapsed run now emits
-    // real Unicode in that font, so without this a sequence like `fi`/`AV`
+    // applying the embedded font's GPOS/GSUB tables. A collapsed run emits real
+    // Unicode in that font, so without this a sequence like `fi`/`AV`
     // could be re-shaped (ligature substitution, kerning) after this code
     // already fixed the PDF glyph IDs and advances, shifting pixels and run
     // widths for otherwise 1:1 text. The PUA glyph layer was immune; restore
@@ -1164,10 +1195,21 @@ class HtmlServiceImpl final : public HtmlService {
     // Invisible text render modes (Tr 3/7): kept in the DOM for selection and
     // search (OCR-over-scan), but not painted.
     out.out() << ".i{color:transparent}";
-    // The display-only glyph layer (`no_unicode` runs) is not selectable, so
-    // the PUA code points stay off the clipboard; `.g` pairs with a combined
-    // `.fvN`/`.fnN` paint+font class on those spans.
-    out.out() << ".g{user-select:none}";
+    // Unclean glyphs (a lost cmap scalar, a /ToUnicode ligature expansion, an
+    // /ActualText substitution, an inferred space, or a `no_unicode` run): the
+    // PUA glyph is painted via generated content from the `data-g` attribute,
+    // so it is *not* in the DOM text stream — it never matches find, is never
+    // selected, and so cannot break a word mid-run. For the unclean cases that
+    // do carry text, the real Unicode rides alongside in a sibling `.ov`
+    // overlay.
+    out.out() << ".gl::before{content:attr(data-g)}";
+    // The selectable/searchable overlay carrying the real Unicode of an unclean
+    // run: zero width (the sibling glyph carries the advance) and invisible,
+    // but still found, selected and copied in reading order. `inline-block`
+    // lets `width:0` apply; the find-contiguity trade-off of an atomic inline
+    // box is a known follow-up (see SINGLE_LAYER_SELECTION_PLAN.md §3).
+    out.out() << ".ov{display:inline-block;width:0;overflow:hidden;"
+                 "color:transparent;vertical-align:baseline}";
     // Vector graphics: one or more `<svg>` overlays per page, each filling the
     // page box (viewBox in PDF points). `overflow:hidden` clips each overlay to
     // the page box, matching a PDF viewer: content drawn outside the MediaBox
@@ -1181,32 +1223,84 @@ class HtmlServiceImpl final : public HtmlService {
     // areas — the graphics are decorative, the text layer owns interaction.
     out.out() << ".s{position:absolute;left:0;top:0;width:100%;height:100%;"
                  "overflow:hidden;pointer-events:none}";
-    // Embedded fonts, re-encoded to the PUA and served inline.
+    // Embedded fonts, re-encoded with real-Unicode cmap entries for collapsed
+    // runs (PUA kept as a fallback) and served inline.
     out.out() << font_faces;
-    // Combined per-font classes (`.fvN`/`.fnN` paint+font, `.gvN`/`.giN` also
-    // placement), so a font-bearing span names one class for its font.
+    // Per-font paint+family classes (`.fvN` visible, `.fnN` invisible), carried
+    // on the line block; placement-free.
     out.out() << glyph_styles;
     // Per-value atomic classes (font sizes, offsets, transforms, ...).
     styles.write_rules(out.out());
     out.write_header_style_end();
     out.write_header_end();
 
-    const auto write_span = [&out](const SpanOut &span) {
-      // Inline so the whole run (and its nested glyph layer) stays on one line:
-      // smaller output and a more legible diff than the open/text/close split,
-      // while each run still gets its own line under the page div.
+    // One run's leading-span class list: the optional `margin-left` plus the
+    // optional colour override (`run.color` carries a leading space, stripped
+    // here), prefixed by `head` (`"gl"` for a generated-content glyph span).
+    const auto run_class = [](const RunOut &run, const char *head) {
+      std::string cls = head;
+      const auto add = [&](const std::string &t) {
+        if (t.empty()) {
+          return;
+        }
+        if (!cls.empty()) {
+          cls += ' ';
+        }
+        cls += t;
+      };
+      add(run.margin);
+      if (!run.color.empty()) {
+        add(run.color.substr(1)); // drop the leading space
+      }
+      return cls;
+    };
+
+    // Emit one line block as a positioned `<div>` (placement + font class)
+    // whose runs flow inline. The whole block is written tight (inline mode) so
+    // the pretty-printer inserts no whitespace between runs — `white-space:pre`
+    // on `.t` would otherwise turn it into real, shifting space.
+    const auto write_line = [&](const LineOut &line) {
+      std::string classes = line.classes;
+      if (!line.font_class.empty()) {
+        classes += ' ';
+        classes += line.font_class;
+      }
       out.write_element_begin(
-          "span",
-          HtmlElementOptions().set_inline(true).set_class(span.classes));
-      out.write_raw(span.text);
-      if (!span.glyph_classes.empty()) {
-        out.write_element_begin("span",
-                                HtmlElementOptions().set_inline(true).set_class(
-                                    span.glyph_classes));
-        out.write_raw(span.glyph_text);
-        out.write_element_end("span");
+          "div", HtmlElementOptions().set_inline(true).set_class(classes));
+      for (const RunOut &run : line.runs) {
+        if (run.glyph_data.empty()) {
+          // Clean / invisible / fallback run: the real Unicode renders
+          // directly. A class-less, margin-less run needs no wrapper — emit the
+          // text bare.
+          const std::string cls = run_class(run, "");
+          if (cls.empty()) {
+            out.write_raw(run.text);
+          } else {
+            out.write_element_begin(
+                "span", HtmlElementOptions().set_inline(true).set_class(cls));
+            out.write_raw(run.text);
+            out.write_element_end("span");
+          }
+        } else {
+          // Unclean run: the glyph painted via generated content (`data-g`,
+          // out of the text stream), then the zero-width selectable overlay
+          // carrying the real Unicode (omitted for a `no_unicode` run).
+          out.write_element_begin(
+              "span", HtmlElementOptions()
+                          .set_inline(true)
+                          .set_class(run_class(run, "gl"))
+                          .set_attributes(HtmlAttributesVector{
+                              {std::string("data-g"), run.glyph_data}}));
+          out.write_element_end("span");
+          if (!run.text.empty()) {
+            out.write_element_begin(
+                "span", HtmlElementOptions().set_inline(true).set_class("ov"));
+            out.write_raw(run.text);
+            out.write_element_end("span");
+          }
+        }
       }
-      out.write_element_end("span");
+      out.write_element_end("div");
     };
 
     out.write_body_begin();
@@ -1247,7 +1341,7 @@ class HtmlServiceImpl final : public HtmlService {
           out.write_raw(path->svg);
         } else {
           close_svg();
-          write_span(std::get<SpanOut>(item));
+          write_line(std::get<LineOut>(item));
         }
       }
       close_svg();
diff --git a/src/odr/internal/pdf/SINGLE_LAYER_SELECTION_PLAN.md b/src/odr/internal/pdf/SINGLE_LAYER_SELECTION_PLAN.md
new file mode 100644
index 00000000..0813fb2a
--- /dev/null
+++ b/src/odr/internal/pdf/SINGLE_LAYER_SELECTION_PLAN.md
@@ -0,0 +1,235 @@
+# Single-layer selection — design discussion & conclusions
+
+Status: **design only, not implemented.** This records a discussion about
+evolving the PDF→HTML text model from the current dual-layer scheme toward a
+mostly single-layer one (à la pdf2htmlEX) for native browser
+find/select/copy. No code in this direction has been written yet.
+
+## Background: where we are today
+
+- **Dual layer.** A visual glyph layer (`PageOut::items`, paint order,
+  PUA-re-encoded glyphs in the embedded font, `user-select:none`,
+  `aria-hidden`) plus a separate transparent selection layer (real Unicode,
+  `.i` = `color:transparent`).
+- **Uniform PUA re-encode (decision 2026-06-19).** Every glyph is re-encoded
+  to `U+E000 + glyph_id` for display; the extracted Unicode is carried
+  separately for the selection layer. Chosen to dodge cmap collision /
+  ligature / no_unicode problems and to decouple display fidelity from
+  text-extraction gaps.
+- **Per-line flow blocks (current branch `pdf-text-selection-layer`).** The
+  selection layer was changed from per-run absolutely-positioned spans to one
+  absolutely-positioned `<div>` per PDF line containing inline `<span>` runs
+  that flow naturally; inter-run gaps are carried as separator spans with
+  `data-w` (gap width), and an on-load JS pass fits each run by setting
+  `letter-spacing = (data-w − measured) / textContent.length`.
+
+The remaining weakness: the selection layer renders in an **unknown system
+font**, so its advances don't match the PDF, hence the runtime JS fit.
+
+## The question explored
+
+Should we drop the separate transparent selection layer and use a single
+layer that is simultaneously visualized and selected — like pdf2htmlEX, which
+uses one `<div>` per line with relatively-shifted spans (negative margins)?
+And how to handle ligatures / non-invertible Unicode mappings?
+
+## Conclusions
+
+### 1. Correct the font-vs-PDF advance with margins, not by rewriting `hmtx`
+
+- **Don't touch `hmtx`.** Stay with the pass-through-font philosophy (cmap
+  rewrite only, no outline surgery). Correct the difference between the
+  embedded font's advances and the PDF's desired x-positions with
+  pdf2htmlEX-style offset / negative-margin spans at run/word boundaries.
+- **The key enabler of a single layer:** the browser renders *our embedded
+  font*, whose advances we know exactly. So the correction is computable at
+  **generation time** (`desired_x − Σ embedded advances`) — no JS runtime
+  measurement. This is precisely what the transparent layer could never do,
+  because it rendered in an unknown system font.
+- Intra-word drift is normally zero because PDF `/Widths` are usually derived
+  from the font's own `hmtx`; corrections are only needed where they diverge
+  (subsetting, `Tz`, synthetic widths).
+- The **Q3 conflict** (`hmtx` holds one advance per glyph id, but a PDF can
+  require different advances for the same glyph) stops being a font problem:
+  it's just another margin correction at the occurrence.
+- Run-level `Tc`/`Tw`/`Tz` remain run-level CSS constants
+  (`letter-spacing`/`word-spacing`/scale), as today — not per-glyph.
+
+### 2. A single layer forces clean glyphs to carry *real* Unicode
+
+This is the non-obvious consequence and a **partial reversal of the uniform
+PUA decision**:
+
+- Browser find (Ctrl+F) indexes *rendered text runs*. `user-select:none` does
+  **not** exclude an element from find, and neither does `color:transparent`.
+- The only reason today's PUA glyphs are invisible to find is that their
+  codepoint is `U+E000+gid` — no real-word query matches them.
+- Therefore, for find/copy to work in a single layer, the **visible** glyph's
+  text content must *be* the real Unicode character, which means the embedded
+  font needs a **real-Unicode cmap entry** for that glyph (not a PUA one).
+
+So the model splits by glyph:
+
+- **Clean, unambiguous glyphs (the majority):** real-Unicode cmap entry → the
+  visible character is correctly shaped *and* real text → natively findable,
+  selectable, copyable, positioned by gen-time margins. Single layer, no
+  overlay.
+- **Ambiguous / ligature / no_unicode glyphs (the minority):** the visible
+  glyph must be kept **out of the DOM text stream entirely** (see §3) +
+  an overlay carrying the real Unicode.
+
+This is essentially pdf2htmlEX's strategy adapted to odr's font philosophy:
+still cmap-rewrite only, just building a real-Unicode cmap for the invertible
+subset instead of a blanket PUA cmap.
+
+### 3. Overlay for the unclean minority — and why `user-select:none` is not enough
+
+> **Correction to an earlier draft of this plan.** The first version said
+> the unclean glyph could stay PUA *DOM text* with `user-select:none`, and
+> that find would ignore it "for free" because no real query matches a
+> private-use codepoint. That is true only when the glyph sits at the *end* of
+> meaningful text. **Mid-word it breaks**, because the PUA codepoint is still
+> in the DOM text stream, wedged between the real characters.
+
+Take "final" where "fi" is a single ligature glyph. If the visible glyph is
+PUA *DOM text*, the browser's text stream is:
+
+```
+"fi" (overlay) + "\uE0xx" (PUA glyph) + "n" + "a" + "l"  →  "fi\uE0xxnal"
+```
+
+- **Search "final"** fails — `user-select:none` blocks *selection*, not
+  *find*; find still reads the PUA codepoint, so the word is no longer
+  contiguous.
+- **Double-click** — the word segmenter treats the PUA char as a boundary, so
+  you don't cleanly get "final".
+- **Triple-click** copies `"fi\uE0xxnal"` — the garbage codepoint lands on the
+  clipboard.
+
+So a mid-word PUA glyph rendered as DOM text is broken for exactly
+find / double-click / triple-click.
+
+**Fix: render the unclean glyph as CSS generated content, not DOM text.**
+`user-select:none` is too weak; `color:transparent` text is still found and
+selected. The only way to make a *visible* glyph invisible to find,
+word-segmentation, *and* selection is to take it out of the document text:
+
+```css
+.lig::before { content: "\e0xx"; }   /* embedded font on .lig; pointer-events:none */
+```
+
+Generated content (`::before`/`content`) is **not** part of the DOM text — not
+findable, not selectable, never on the clipboard. The `.lig` element itself is
+an *empty* span. So the only DOM text at that position is the overlapping
+transparent real-Unicode overlay, and the stream becomes contiguous:
+
+```
+"fi" (overlay, transparent) + "n" + "a" + "l"  →  "final"
+```
+
+With that:
+
+- **Search "final"** matches — the glyph contributes nothing; find concatenates
+  across sibling inline spans, so per-glyph "n"/"a"/"l" spans don't break it.
+- **Double-click** word-selects "final" and copies "final" — *provided the
+  click hit-tests onto the selectable overlay*. So the selectable transparent
+  text must be **on top**, with the visible glyph layer behind it and
+  `pointer-events:none`, so clicks fall through to the real text.
+- **Triple-click** selects the line block's selectable text in DOM order →
+  real text, glyph skipped.
+- **DOM order = reading order**, so drag-select copies in the right sequence.
+- **Truly unknown glyphs (no_unicode):** generated-content glyph, no overlay.
+  That text stays uncopyable/unsearchable — honest, since we don't know what
+  it says.
+
+#### Sizing the overlay: `width` vs `margin` + zero width
+
+Three jobs pull in different directions and **cannot all be satisfied for
+free**:
+
+1. supply real searchable/selectable text in reading position → wants **plain
+   `inline`**, in DOM order (so find concatenates it with neighbors);
+2. make the selection-highlight rectangle cover the glyph → wants a **sized
+   box** at the glyph's x;
+3. not disturb the layout advance — the glyph already supplies it via its
+   `::before` box → wants **zero net advance** on the overlay.
+
+The two shapes considered:
+
+- **`display:inline-block; width:<glyph_px>; overflow:hidden`** — satisfies (2)
+  and (3) directly and needs no runtime measurement (only the box matters; the
+  transparent text inside is irrelevant). *But* an atomic inline-block can
+  **break the find text run across its boundary** in some browsers, which
+  reintroduces the contiguity problem from above. So it trades find-correctness
+  for a perfect highlight box.
+- **plain `inline` + `width:0`** — `width` has **no effect on a non-replaced
+  inline** element, so this does nothing on its own. To zero a plain inline's
+  advance you must cancel its own rendered width with `margin-right:
+  -<own_width>`. Horizontal margins *do* apply to inline elements, so the
+  shift works — but `<own_width>` is the overlay text rendered in an **unknown
+  system font** (browsers fall back per-character when the embedded subset
+  lacks `f`/`i`), so the cancel value is not known at generation time →
+  runtime measurement, defeating the no-JS goal.
+
+So the honest statement: you can't get *plain-inline find-contiguity* +
+*zero advance* + *no runtime measurement* simultaneously. The candidates,
+each giving up one thing:
+
+- give up **no-measurement**: plain inline + a tiny JS `margin-right` cancel —
+  but only for the rare unclean glyphs (a small, local fit, not a whole-line
+  one);
+- give up **contiguity-guarantee**: inline-block box + verify on target
+  browsers that find still matches across it (it may — needs testing);
+- give up **system-font fallback**: ship a controlled blank/zero glyph in the
+  overlay font and a cmap entry for the overlay codepoints so the overlay
+  renders in *our* font with a **known** advance, cancelable at gen time with
+  no measurement — at the cost of more font work.
+
+For find/copy correctness the overlay width is irrelevant (only DOM text +
+order matter); width only affects the highlight rectangle. So if forced to
+choose, prefer the contiguity-safe `inline` shape and accept a loose highlight
+over a ligature. **This needs empirical testing on the target browsers before
+the shape is fixed.**
+
+Net effect: the dual-layer complexity shrinks from "everything" to "the rare
+unclean glyphs," which become a small *local* overlay (generated-content glyph
++ overlapping transparent real-Unicode text). The runtime JS fit disappears
+for the clean majority.
+
+## Resulting architecture (proposed)
+
+- Build a real-Unicode cmap for the embedded subset where the Unicode→glyph
+  mapping is unambiguous and invertible.
+- Those glyphs render from real codepoints → single findable/selectable layer,
+  positioned by gen-time negative margins computed from embedded-font
+  advances. No JS.
+- Ambiguous / ligature / no_unicode glyphs render the glyph via CSS
+  generated content (out of the DOM text stream, `pointer-events:none`) with
+  an overlapping transparent real-Unicode overlay where the Unicode is known;
+  truly unknown glyphs get no overlay. The overlay's box shape
+  (`inline` vs `inline-block`) is an open trade-off — see §3.
+
+## The real work hiding here
+
+The cmap-builder's **collision / eligibility policy**: deciding, per glyph,
+whether a clean real-Unicode entry is possible or it falls back to
+PUA + overlay. Before committing, the thing worth measuring is how big the
+clean majority actually is across the corpus:
+
+- how often the embedded `hmtx` already equals the PDF `/Widths` (→ how often
+  margins are even needed),
+- how often the same glyph carries conflicting widths (Q3 frequency),
+- how often Unicode→glyph is non-invertible (→ overlay frequency).
+
+That ratio decides whether we're mostly in the correction-free single-layer
+path or frequently emitting corrections/overlays.
+
+## Open / not decided
+
+- Whether to proceed at all, vs. keeping the current dual layer with the JS
+  fit.
+- The eligibility-pass sketch against the current font path (offered, not yet
+  done).
+- The overlay box shape for unclean glyphs (`inline` vs `inline-block` vs a
+  controlled overlay font) — needs browser testing of find-contiguity vs
+  highlight-box quality (§3).