diff --git a/CHANGELOG.md b/CHANGELOG.md index 351ace52bf..b59b2810e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Detect AWS SDK `Apache5HttpClient` in `AwsSdk2Transport` body-method guardrail ([#1903](https://github.com/opensearch-project/opensearch-java/pull/1970)) - Support Jackson 3.x release line ([#1810](https://github.com/opensearch-project/opensearch-java/pull/1810)) - Added `equals()` and `hashCode()` implementations to `FieldValue` ([#1998](https://github.com/opensearch-project/opensearch-java/pull/1998)) +- Add document lifecycle guide and runnable sample ([#2017](https://github.com/opensearch-project/opensearch-java/pull/2017)) ### Fixed diff --git a/guides/document_lifecycle.md b/guides/document_lifecycle.md new file mode 100644 index 0000000000..2e040d0fc4 --- /dev/null +++ b/guides/document_lifecycle.md @@ -0,0 +1,294 @@ +- [Document Lifecycle](#document-lifecycle) + - [Setup](#setup) + - [Index a document with an ID](#index-a-document-with-an-id) + - [Handle duplicate documents](#handle-duplicate-documents) + - [Index or replace a document](#index-or-replace-a-document) + - [Index a document with an auto-generated ID](#index-a-document-with-an-auto-generated-id) + - [Get a document](#get-a-document) + - [Filter source fields](#filter-source-fields) + - [Get multiple documents](#get-multiple-documents) + - [Check whether a document exists](#check-whether-a-document-exists) + - [Update a document](#update-a-document) + - [Update a document with a script](#update-a-document-with-a-script) + - [Update documents by query](#update-documents-by-query) + - [Reindex documents](#reindex-documents) + - [Delete a document](#delete-a-document) + - [Delete documents by query](#delete-documents-by-query) + - [Clean up](#clean-up) + +# Document Lifecycle + +This guide covers common document lifecycle operations with the OpenSearch Java client: indexing, retrieving, updating, reindexing, and deleting documents. + +You can find a working version of the code in [DocumentLifecycle.java](../samples/src/main/java/org/opensearch/client/samples/DocumentLifecycle.java). + +## Setup + +Create a client and the indices used by the examples below. + +```java +final HttpHost[] hosts = new HttpHost[] { + new HttpHost("http", "localhost", 9200) + }; + +final OpenSearchTransport transport = ApacheHttpClient5TransportBuilder + .builder(hosts) + .setMapper(new JacksonJsonpMapper()) + .build(); +OpenSearchClient client = new OpenSearchClient(transport); + +String index = "movies-document-lifecycle"; +String reindexedIndex = "movies-document-lifecycle-reindexed"; + +client.indices().create(c -> c.index(index)); +``` + +The examples use this `Movie` document class. + +```java +public static class Movie { + private String title; + private Integer year; + + public Movie() {} + + public Movie(String title, Integer year) { + this.title = title; + this.year = year; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public Integer getYear() { + return year; + } + + public void setYear(Integer year) { + this.year = year; + } +} +``` + +## Index a document with an ID + +Use the create API when the document must not already exist. OpenSearch returns an error if another document already has the same ID. + +```java +Movie movie = new Movie("Beauty and the Beast", 1991); + +CreateResponse response = client.create( + c -> c.index(index) + .id("1") + .document(movie) + .refresh(Refresh.WaitFor) +); +``` + +## Handle duplicate documents + +A second index request with the same ID returns a `409` conflict. Keep expected errors inside a `try/catch` block so the sample can keep running. Depending on the transport and error conversion path, the conflict may be raised as a transport `ResponseException` or as an `OpenSearchException`. + +```java +try { + client.create(c -> c.index(index).id("1").document(new Movie("Beauty and the Beast", 1991))); +} catch (ResponseException e) { + if (e.status() != 409) { + throw e; + } +} catch (OpenSearchException e) { + if (e.status() != 409) { + throw e; + } +} +``` + +## Index or replace a document + +Use the index API when you want to create or replace a document. If the document with such ID already exists, OpenSearch replaces the stored document. + +```java +IndexResponse response = client.index( + i -> i.index(index) + .id("1") + .document(new Movie("Beauty and the Beast: Special Edition", 2002)) + .refresh(Refresh.WaitFor) +); +``` + +## Index a document with an auto-generated ID + +If you do not provide an ID, OpenSearch generates one and returns it in the index response. + +```java +IndexResponse response = client.index( + i -> i.index(index) + .document(new Movie("The Lion King", 1994)) + .refresh(Refresh.WaitFor) +); + +String generatedId = response.id(); +``` + +## Get a document + +Use the get API to retrieve a document by index and ID. + +```java +GetResponse response = client.get(g -> g.index(index).id("1"), Movie.class); + +if (response.found()) { + Movie movie = response.source(); +} +``` + +## Filter source fields + +Use source includes or excludes to control which fields OpenSearch returns in `_source`. + +```java +GetResponse titleOnly = client.get( + g -> g.index(index) + .id("1") + .sourceIncludes("title"), + Movie.class +); + +GetResponse withoutYear = client.get( + g -> g.index(index) + .id("1") + .sourceExcludes("year"), + Movie.class +); +``` + +## Get multiple documents + +Use the multi get API to retrieve several documents in one request. + +```java +MgetResponse response = client.mget( + m -> m.index(index).ids("1", generatedId), + Movie.class +); + +for (MultiGetResponseItem item : response.docs()) { + if (item.isResult() && item.result().found()) { + Movie movie = item.result().source(); + } +} +``` + +## Check whether a document exists + +Use the exists API when you only need to know whether a document is present. + +```java +boolean exists = client.exists(e -> e.index(index).id("1")).value(); +``` + +## Update a document + +Use the update API with a partial document to change selected fields. + +```java +UpdateRequest> request = + new UpdateRequest.Builder>() + .index(index) + .id("1") + .doc(Map.of("year", (Object) 1995)) + .refresh(Refresh.WaitFor) + .build(); + +UpdateResponse response = client.update(request, Movie.class); +``` + +## Update a document with a script + +Use an inline script when the update should be computed from the current document state. + +```java +UpdateRequest request = new UpdateRequest.Builder() + .index(index) + .id("1") + .script(s -> s.inline(i -> i.source("ctx._source.year += 5"))) + .refresh(Refresh.WaitFor) + .build(); + +UpdateResponse response = client.update(request, Movie.class); +``` + +## Update documents by query + +Use update by query to update every document that matches a query. + +```java +client.index( + i -> i.index(index) + .id("future") + .document(new Movie("Future Movie", 2025)) + .refresh(Refresh.WaitFor) +); + +Query newerThan2023 = Query.of( + q -> q.range(r -> r.field("year").gt(JsonData.of(2023))) +); + +UpdateByQueryResponse response = client.updateByQuery( + u -> u.index(index) + .query(newerThan2023) + .script(s -> s.inline(i -> i.source("ctx._source.year -= 1"))) + .refresh(Refresh.True) +); +``` + +## Reindex documents + +Use reindex to copy documents from one index to another. + +```java +ReindexResponse response = client.reindex( + r -> r.source(s -> s.index(index)) + .dest(d -> d.index(reindexedIndex)) + .refresh(Refresh.True) + .waitForCompletion(true) +); +``` + +## Delete a document + +Use the delete API to remove one document by ID. + +```java +DeleteResponse response = client.delete( + d -> d.index(index) + .id("1") + .refresh(Refresh.WaitFor) +); +``` + +## Delete documents by query + +Use delete by query to remove every document that matches a query. + +```java +DeleteByQueryResponse response = client.deleteByQuery( + d -> d.index(index) + .query(newerThan2023) + .refresh(Refresh.True) +); +``` + +## Clean up + +Delete the sample indexes when you are done. + +```java +client.indices().delete(d -> d.index(reindexedIndex)); +client.indices().delete(d -> d.index(index)); +``` diff --git a/samples/src/main/java/org/opensearch/client/samples/DocumentLifecycle.java b/samples/src/main/java/org/opensearch/client/samples/DocumentLifecycle.java new file mode 100644 index 0000000000..6e8183201b --- /dev/null +++ b/samples/src/main/java/org/opensearch/client/samples/DocumentLifecycle.java @@ -0,0 +1,273 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.samples; + +import java.util.Map; +import org.apache.hc.core5.http.HttpHost; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.client.json.JsonData; +import org.opensearch.client.json.jackson.JacksonJsonpMapper; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.OpenSearchException; +import org.opensearch.client.opensearch._types.Refresh; +import org.opensearch.client.opensearch._types.Result; +import org.opensearch.client.opensearch._types.query_dsl.Query; +import org.opensearch.client.opensearch.core.UpdateRequest; +import org.opensearch.client.transport.OpenSearchTransport; +import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; +import org.opensearch.client.transport.httpclient5.ResponseException; + +/** + * Run with: {@code ./gradlew :samples:run -Dsamples.mainClass=DocumentLifecycle} + */ +public class DocumentLifecycle { + private static final Logger LOGGER = LogManager.getLogger(DocumentLifecycle.class); + private static final String INDEX_NAME = "movies-document-lifecycle"; + private static final String REINDEXED_INDEX_NAME = "movies-document-lifecycle-reindexed"; + + public static void main(String[] args) throws Exception { + final HttpHost[] hosts = new HttpHost[] { new HttpHost("http", "localhost", 9200) }; + + final OpenSearchTransport transport = ApacheHttpClient5TransportBuilder.builder(hosts).setMapper(new JacksonJsonpMapper()).build(); + OpenSearchClient client = new OpenSearchClient(transport); + + var version = client.info().version(); + LOGGER.info("Server: {}@{}", version.distribution(), version.number()); + + deleteIndexIfExists(client, REINDEXED_INDEX_NAME); + deleteIndexIfExists(client, INDEX_NAME); + + try { + createIndex(client); + createDocumentWithId(client); + handleDuplicateCreate(client); + indexOrReplaceDocument(client); + var generatedId = createDocumentWithAutoGeneratedId(client); + getDocument(client); + filterSourceFields(client); + getMultipleDocuments(client, generatedId); + checkDocumentExists(client); + updateDocument(client); + updateDocumentWithScript(client); + addFutureMovie(client); + updateDocumentsByQuery(client); + reindexDocuments(client); + deleteDocument(client); + deleteDocumentsByQuery(client); + } finally { + deleteIndexIfExists(client, REINDEXED_INDEX_NAME); + deleteIndexIfExists(client, INDEX_NAME); + } + } + + private static void createIndex(OpenSearchClient client) throws Exception { + LOGGER.info("Creating index {}", INDEX_NAME); + client.indices().create(c -> c.index(INDEX_NAME)); + } + + private static void createDocumentWithId(OpenSearchClient client) throws Exception { + var movie = new Movie("Beauty and the Beast", 1991); + var response = client.create(c -> c.index(INDEX_NAME).id("1").document(movie).refresh(Refresh.WaitFor)); + + require(response.result() == Result.Created, "Expected create to create document 1"); + LOGGER.info("Created document {} with result {}", response.id(), response.result()); + } + + private static void handleDuplicateCreate(OpenSearchClient client) throws Exception { + try { + client.create(c -> c.index(INDEX_NAME).id("1").document(new Movie("Beauty and the Beast", 1991))); + throw new IllegalStateException("Expected duplicate create to fail"); + } catch (ResponseException e) { + require(e.status() == 409, "Expected duplicate create to return a 409 status"); + LOGGER.info("Duplicate create returned the expected {} status", e.status()); + } catch (OpenSearchException e) { + require(e.status() == 409, "Expected duplicate create to return a 409 status"); + LOGGER.info("Duplicate create returned the expected {} status", e.status()); + } + } + + private static void indexOrReplaceDocument(OpenSearchClient client) throws Exception { + var movie = new Movie("Beauty and the Beast: Special Edition", 2002); + var response = client.index(i -> i.index(INDEX_NAME).id("1").document(movie).refresh(Refresh.WaitFor)); + + require(response.result() == Result.Updated, "Expected index to replace document 1"); + LOGGER.info("Indexed document {} with result {}", response.id(), response.result()); + } + + private static String createDocumentWithAutoGeneratedId(OpenSearchClient client) throws Exception { + var response = client.index(i -> i.index(INDEX_NAME).document(new Movie("The Lion King", 1994)).refresh(Refresh.WaitFor)); + + require(response.id() != null && !response.id().isEmpty(), "Expected an auto-generated ID"); + LOGGER.info("Created document with auto-generated ID {}", response.id()); + return response.id(); + } + + private static void getDocument(OpenSearchClient client) throws Exception { + var response = client.get(g -> g.index(INDEX_NAME).id("1"), Movie.class); + + require(response.found(), "Expected document 1 to exist"); + require("Beauty and the Beast: Special Edition".equals(response.source().getTitle()), "Expected document 1 title to match"); + LOGGER.info("Fetched document {}: {}", response.id(), response.source()); + } + + private static void filterSourceFields(OpenSearchClient client) throws Exception { + var titleOnly = client.get(g -> g.index(INDEX_NAME).id("1").sourceIncludes("title"), Movie.class); + require(titleOnly.found(), "Expected source-filtered document 1 to exist"); + require(titleOnly.source().getTitle() != null, "Expected title to be returned"); + require(titleOnly.source().getYear() == null, "Expected year to be filtered out"); + + var withoutYear = client.get(g -> g.index(INDEX_NAME).id("1").sourceExcludes("year"), Movie.class); + require(withoutYear.found(), "Expected source-excluded document 1 to exist"); + require(withoutYear.source().getYear() == null, "Expected year to be excluded"); + LOGGER.info("Fetched document 1 with source filtering"); + } + + private static void getMultipleDocuments(OpenSearchClient client, String generatedId) throws Exception { + var response = client.mget(m -> m.index(INDEX_NAME).ids("1", generatedId), Movie.class); + var foundDocuments = response.docs().stream().filter(item -> item.isResult() && item.result().found()).count(); + + require(foundDocuments == 2, "Expected mget to return two documents"); + LOGGER.info("Multi get returned {} documents", foundDocuments); + } + + private static void checkDocumentExists(OpenSearchClient client) throws Exception { + var exists = client.exists(e -> e.index(INDEX_NAME).id("1")).value(); + + require(exists, "Expected document 1 to exist"); + LOGGER.info("Document 1 exists"); + } + + private static void updateDocument(OpenSearchClient client) throws Exception { + UpdateRequest> request = new UpdateRequest.Builder>().index(INDEX_NAME) + .id("1") + .doc(Map.of("year", (Object) 1995)) + .refresh(Refresh.WaitFor) + .build(); + var response = client.update(request, Movie.class); + + require(response.result() == Result.Updated, "Expected partial update to update document 1"); + require(getMovie(client, "1").getYear() == 1995, "Expected document 1 year to be 1995"); + LOGGER.info("Updated document {} with result {}", response.id(), response.result()); + } + + private static void updateDocumentWithScript(OpenSearchClient client) throws Exception { + UpdateRequest request = new UpdateRequest.Builder().index(INDEX_NAME) + .id("1") + .script(s -> s.inline(i -> i.source("ctx._source.year += 5"))) + .refresh(Refresh.WaitFor) + .build(); + var response = client.update(request, Movie.class); + + require(response.result() == Result.Updated, "Expected script update to update document 1"); + require(getMovie(client, "1").getYear() == 2000, "Expected document 1 year to be 2000"); + LOGGER.info("Updated document {} with a script", response.id()); + } + + private static void addFutureMovie(OpenSearchClient client) throws Exception { + client.index(i -> i.index(INDEX_NAME).id("future").document(new Movie("Future Movie", 2025)).refresh(Refresh.WaitFor)); + } + + private static void updateDocumentsByQuery(OpenSearchClient client) throws Exception { + var response = client.updateByQuery( + u -> u.index(INDEX_NAME) + .query(yearGreaterThan(2023)) + .script(s -> s.inline(i -> i.source("ctx._source.year -= 1"))) + .refresh(Refresh.True) + ); + + require(response.failures().isEmpty(), "Expected update by query to have no failures"); + require(response.updated() != null && response.updated() == 1, "Expected update by query to update one document"); + require(getMovie(client, "future").getYear() == 2024, "Expected future movie year to be 2024"); + LOGGER.info("Updated {} documents by query", response.updated()); + } + + private static void reindexDocuments(OpenSearchClient client) throws Exception { + var response = client.reindex( + r -> r.source(s -> s.index(INDEX_NAME)).dest(d -> d.index(REINDEXED_INDEX_NAME)).refresh(Refresh.True).waitForCompletion(true) + ); + + require(response.failures().isEmpty(), "Expected reindex to have no failures"); + require(client.get(g -> g.index(REINDEXED_INDEX_NAME).id("future"), Movie.class).found(), "Expected reindexed document to exist"); + LOGGER.info("Reindexed {} documents", response.created()); + } + + private static void deleteDocument(OpenSearchClient client) throws Exception { + var response = client.delete(d -> d.index(INDEX_NAME).id("1").refresh(Refresh.WaitFor)); + + require(response.result() == Result.Deleted, "Expected document 1 to be deleted"); + require(!client.exists(e -> e.index(INDEX_NAME).id("1")).value(), "Expected document 1 to no longer exist"); + LOGGER.info("Deleted document {}", response.id()); + } + + private static void deleteDocumentsByQuery(OpenSearchClient client) throws Exception { + var response = client.deleteByQuery(d -> d.index(INDEX_NAME).query(yearGreaterThan(2023)).refresh(Refresh.True)); + + require(response.failures().isEmpty(), "Expected delete by query to have no failures"); + require(response.deleted() != null && response.deleted() == 1, "Expected delete by query to delete one document"); + require(!client.exists(e -> e.index(INDEX_NAME).id("future")).value(), "Expected future movie to be deleted"); + LOGGER.info("Deleted {} documents by query", response.deleted()); + } + + private static Movie getMovie(OpenSearchClient client, String id) throws Exception { + var response = client.get(g -> g.index(INDEX_NAME).id(id), Movie.class); + require(response.found(), "Expected document " + id + " to exist"); + return response.source(); + } + + private static Query yearGreaterThan(int year) { + return Query.of(q -> q.range(r -> r.field("year").gt(JsonData.of(year)))); + } + + private static void deleteIndexIfExists(OpenSearchClient client, String indexName) throws Exception { + if (client.indices().exists(e -> e.index(indexName)).value()) { + LOGGER.info("Deleting index {}", indexName); + client.indices().delete(d -> d.index(indexName)); + } + } + + private static void require(boolean condition, String message) { + if (!condition) { + throw new IllegalStateException(message); + } + } + + public static class Movie { + private String title; + private Integer year; + + public Movie() {} + + public Movie(String title, Integer year) { + this.title = title; + this.year = year; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public Integer getYear() { + return year; + } + + public void setYear(Integer year) { + this.year = year; + } + + @Override + public String toString() { + return String.format("Movie{title='%s', year=%s}", title, year); + } + } +}