From fd1eec1c725b3a26244dbe5ed1d66e4a4cf3c089 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1rton=20Kardos=C3=98?= <power.up1163@gmail.com>
Date: Tue, 30 Jun 2026 15:02:39 +0200
Subject: [PATCH 1/3] Fixed top documents update in SensTopic

---
 turftopic/models/senstopic.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/turftopic/models/senstopic.py b/turftopic/models/senstopic.py
index e7382f1..b97320f 100644
--- a/turftopic/models/senstopic.py
+++ b/turftopic/models/senstopic.py
@@ -279,13 +279,11 @@ def partial_fit(
                         *self.topic_names[-n_new_components:],
                     ]
             console.log("Updated term importances")
-            self.top_documents.extend(
-                self.get_top_documents(
-                    raw_documents,
-                    document_topic_matrix=doc_topic[:, -n_new_components:],
+            for new_dt in doc_topic[:, -n_new_components:].T:
+                top = np.argsort(-new_dt)
+                self.top_documents.append(
+                    [raw_documents[i_top] for i_top in top]
                 )
-            )
-            self.document_topic_matrix = doc_topic
             console.log("Model update done.")
         return self
 

From c34d845fbda1c9b7b6b500b7cbbf0125411879a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1rton=20Kardos=C3=98?= <power.up1163@gmail.com>
Date: Tue, 30 Jun 2026 16:15:36 +0200
Subject: [PATCH 2/3] Added dynamic online topic modelling to SensTopic, fixed
 errors

---
 turftopic/dynamic.py          |   4 +-
 turftopic/models/_snmf.py     |   5 +-
 turftopic/models/senstopic.py | 112 ++++++++++++++++++++++++++--------
 3 files changed, 92 insertions(+), 29 deletions(-)

diff --git a/turftopic/dynamic.py b/turftopic/dynamic.py
index bd15be6..8d2da8f 100644
--- a/turftopic/dynamic.py
+++ b/turftopic/dynamic.py
@@ -29,8 +29,8 @@ def bin_timestamps(
         # Have to substract one, else it starts from one
         return np.digitize(unix_timestamps, unix_bins) - 1, bins
     else:
-        # Adding one day, so that the maximum value is still included.
-        max_timestamp = max(timestamps) + timedelta(days=1)
+        # Adding one microsecond, so that the maximum value is still included.
+        max_timestamp = max(timestamps) + timedelta(microseconds=1)
         unix_bins = np.histogram_bin_edges(unix_timestamps, bins=bins)
         unix_bins[-1] = max_timestamp.timestamp()
         bins = [datetime.fromtimestamp(ts) for ts in unix_bins]
diff --git a/turftopic/models/_snmf.py b/turftopic/models/_snmf.py
index 96bf84e..22b327e 100644
--- a/turftopic/models/_snmf.py
+++ b/turftopic/models/_snmf.py
@@ -199,13 +199,14 @@ def fit_timeslice(self, X_t: np.ndarray, G_t: np.ndarray):
         F = update_F(X_t.T, G_t, F=None)
         return F.T
 
-    def transform(self, X: np.ndarray):
+    def transform(self, X: np.ndarray, F=None):
         G = init_G(
             X.T,
             n_components=self.n_components,
             random_state=self.random_state,
         )
-        F = self.components_.T
+        if F is None:
+            F = self.components_.T
         update = jit(lambda G: update_G(X.T, G, F, sparsity=self.sparsity))
         error_at_init = rec_err(X.T, F, G)
         prev_error = error_at_init
diff --git a/turftopic/models/senstopic.py b/turftopic/models/senstopic.py
index b97320f..3a99246 100644
--- a/turftopic/models/senstopic.py
+++ b/turftopic/models/senstopic.py
@@ -1,4 +1,4 @@
-from datetime import datetime
+from datetime import datetime, timedelta
 from functools import partial
 from typing import Literal, Optional, Union
 
@@ -217,7 +217,7 @@ def update_vocabulary(self, raw_documents):
             set(new_vectorizer.get_feature_names_out()) - set(old_vocab)
         )
         if len(new_vocab) == 0:
-            return
+            return []
         new_vocab_embeddings = self.encode_documents(new_vocab)
         self.vocab_embeddings = np.concatenate(
             [self.vocab_embeddings, new_vocab_embeddings], axis=0
@@ -225,12 +225,38 @@ def update_vocabulary(self, raw_documents):
         self.vectorizer.get_feature_names_out = lambda: np.array(
             list(old_vocab) + new_vocab
         )
+        return new_vocab
 
     def partial_fit(
-        self, raw_documents, y=None, embeddings=None, n_new_components="auto"
+        self,
+        raw_documents,
+        y=None,
+        embeddings=None,
+        timestamps=None,
+        n_new_components="auto",
     ):
+        if timestamps is not None:
+            if (getattr(self, "components_", None) is None) or (
+                getattr(self, "time_bin_edges", None) is None
+            ):
+                return self.fit_transform_dynamic(
+                    raw_documents,
+                    embeddings=embeddings,
+                    timestamps=timestamps,
+                    bins=1,
+                )
         if getattr(self, "components_", None) is None:
-            return self.fit(raw_documents, embeddings=embeddings)
+            if timestamps is None:
+                return self.fit(raw_documents, embeddings=embeddings)
+        if timestamps is not None:
+            last_edge = self.time_bin_edges[-1]
+            is_before = [(ts <= last_edge) for ts in timestamps]
+            n_before = np.sum(is_before)
+            if n_before:
+                raise ValueError(
+                    "When using partial fitting on a dynamic model, all new documents have to be in a new time slice. "
+                    f"Currently there are {n_before} documents from before {last_edge}. Remove these before fitting."
+                )
         console = Console()
         with console.status("Updating model with new data") as status:
             if embeddings is None:
@@ -253,10 +279,11 @@ def partial_fit(
             )
             self.n_components_ = self.decomposition.n_components
             doc_topic = self.decomposition.transform(embeddings)
-            console.log("Updated model")
+            console.log(f"Updated model with {n_new_components} topics.")
             status.update("Updating vocabulary")
-            self.update_vocabulary(raw_documents)
-            console.log("Updated vocabulary")
+            new_vocab = self.update_vocabulary(raw_documents)
+            n_new_vocab = len(new_vocab)
+            console.log(f"Updated vocabulary with {n_new_vocab} items.")
             status.update("Estimating term importances")
             vocab_topic = self.decomposition.transform(self.vocab_embeddings)
             self.axial_components_ = vocab_topic.T
@@ -284,6 +311,36 @@ def partial_fit(
                 self.top_documents.append(
                     [raw_documents[i_top] for i_top in top]
                 )
+            if timestamps is not None:
+                status.update("Updating temporal components.")
+                self.time_bin_edges.append(
+                    max(timestamps) + timedelta(microseconds=1)
+                )
+                t_components = []
+                t_importance = []
+                for t_component, t_imp in zip(
+                    self.axial_temporal_components_, self.temporal_importance_
+                ):
+                    t_component = np.pad(
+                        t_component,
+                        [(0, n_new_components), (0, n_new_vocab)],
+                        mode="constant",
+                        constant_values=0,
+                    )
+                    t_imp = np.pad(
+                        t_imp,
+                        (0, n_new_components),
+                        mode="constant",
+                        constant_values=0,
+                    )
+                    t_components.append(t_component)
+                    t_importance.append(t_imp)
+                new_imp, new_comp = self._fit_timebin(embeddings, doc_topic)
+                t_components.append(new_comp)
+                t_importance.append(new_imp)
+                self.axial_temporal_components_ = np.stack(t_components)
+                self.temporal_importance_ = np.stack(t_importance)
+                self.estimate_components(self.feature_importance)
             console.log("Model update done.")
         return self
 
@@ -371,6 +428,13 @@ def fit_transform_multimodal(
             console.log("Images transformed")
         return doc_topic
 
+    def _fit_timebin(self, t_X, t_dt):
+        t_imp = t_dt.mean(axis=0)
+        t_F = self.decomposition.fit_timeslice(t_X, t_dt).T
+        t_G = self.decomposition.transform(self.vocab_embeddings, F=t_F)
+        t_components_ = t_G.T
+        return t_imp, t_components_
+
     def fit_transform_dynamic(
         self,
         raw_documents,
@@ -378,9 +442,14 @@ def fit_transform_dynamic(
         embeddings: Optional[np.ndarray] = None,
         bins: Union[int, list[datetime]] = 10,
     ) -> np.ndarray:
-        document_topic_matrix = self.fit_transform(
-            raw_documents, embeddings=embeddings
-        )
+        if getattr(self, "components_", None) is None:
+            document_topic_matrix = self.fit_transform(
+                raw_documents, embeddings=embeddings
+            )
+        else:
+            document_topic_matrix = self.transform(
+                raw_documents, embeddings=embeddings
+            )
         time_labels, self.time_bin_edges = self.bin_timestamps(
             timestamps, bins
         )
@@ -392,22 +461,15 @@ def fit_transform_dynamic(
             dtype=self.components_.dtype,
         )
         self.temporal_importance_ = np.zeros((n_bins, n_comp))
-        # doc_topic = np.dot(X, self.components_.T)
         for i_timebin in np.unique(time_labels):
-            topic_importances = document_topic_matrix[
-                time_labels == i_timebin
-            ].mean(axis=0)
-            self.temporal_importance_[i_timebin, :] = topic_importances
-            t_doc_topic = document_topic_matrix[time_labels == i_timebin]
-            t_embeddings = self.embeddings[time_labels == i_timebin]
-            t_components = self.decomposition.fit_timeslice(
-                t_embeddings, t_doc_topic
-            )
-            ax_t = np.maximum(
-                self.vocab_embeddings @ np.linalg.pinv(t_components), 0
-            )
-            self.axial_temporal_components_[i_timebin, :, :] = ax_t.T
-        self.estimate_components(self.feature_importance)
+            t_dt = document_topic_matrix[time_labels == i_timebin]
+            t_X = self.embeddings[time_labels == i_timebin]
+            t_imp, t_comp = self._fit_timebin(t_X, t_dt)
+            self.temporal_importance_[i_timebin, :] = t_imp
+            self.axial_temporal_components_[i_timebin, :, :] = t_comp
+        self.estimate_components(
+            self.feature_importance,
+        )
         return document_topic_matrix
 
     @property

From 0d18d2beb361b2ee82c15b6e6e990c775af5afcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1rton=20Kardos=C3=98?= <power.up1163@gmail.com>
Date: Tue, 30 Jun 2026 16:16:29 +0200
Subject: [PATCH 3/3] Version bump

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1269899..eded9c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ profile = "black"
 
 [project]
 name = "turftopic"
-version = "0.26.0"
+version = "0.26.1"
 description = "Topic modeling with contextual representations from sentence transformers."
 authors = [
    { name = "Márton Kardos <power.up1163@gmail.com>", email = "martonkardos@cas.au.dk" }