OpenSearch集成
引入依赖 初始化RestHighLevelClient和BulkProcessor对象 增删改操作 3.1 数据准备 3.2 单条数据异步插入 3.3 单条数据同步插入 3.4 批量插入 3.5 更新操作 3.6 带条件的更新语句 3.7 批量更新 3.8 删除操作 3.9 条件删除
1. 引入依赖
<!--解决:java.lang.NoClassDefFoundError: org/elasticsearch/common/xcontent/DeprecationHandler-->
<!-- elasticsearch -->
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>7.5.1</version>
</dependency>
<!-- elasticsearch-rest-client -->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-client</artifactId>
<version>7.5.1</version>
</dependency>
<!-- elasticsearch-rest-high-level-client -->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.5.1</version>
<exclusions>
<exclusion>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
</exclusion>
</exclusions>
</dependency>
2. 初始化RestHighLevelClient和BulkProcessor对象
RestHighLevelClientRestHighLevelClient是官方指定的连接API。另外一个是TransportClient,但是TransportClient这个是已经废弃不用的,所以会在ES8.0之后完全移除,也就是说8.0之后就无法使用了。
@Slf4j
public class EsTest {
//es操作客户端
private static RestHighLevelClient restHighLevelClient;
//批量操作的对象
private static BulkProcessor bulkProcessor;
static {
List<HttpHost> httpHosts = new ArrayList<>();
//填充数据
httpHosts.add(new HttpHost("172.26.17.11", 9200));
httpHosts.add(new HttpHost("172.26.17.11", 9201));
httpHosts.add(new HttpHost("172.26.17.11", 9202));
//填充host节点
RestClientBuilder builder = RestClient.builder(httpHosts.toArray(new HttpHost[0]));
builder.setRequestConfigCallback(requestConfigBuilder -> {
requestConfigBuilder.setConnectTimeout(1000);
requestConfigBuilder.setSocketTimeout(1000);
requestConfigBuilder.setConnectionRequestTimeout(1000);
return requestConfigBuilder;
});
//填充用户名密码
final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials("userName", "password"));
builder.setHttpClientConfigCallback(httpClientBuilder -> {
httpClientBuilder.setMaxConnTotal(30);
httpClientBuilder.setMaxConnPerRoute(30);
httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
return httpClientBuilder;
});
restHighLevelClient = new RestHighLevelClient(builder);
}
static {
bulkProcessor=createBulkProcessor();
}
private static BulkProcessor createBulkProcessor() {
BulkProcessor.Listener listener = new BulkProcessor.Listener() {
@Override
public void beforeBulk(long executionId, BulkRequest request) {
log.info("1. 【beforeBulk】批次[{}] 携带 {} 请求数量", executionId, request.numberOfActions());
}
@Override
public void afterBulk(long executionId, BulkRequest request,
BulkResponse response) {
if (!response.hasFailures()) {
log.info("2. 【afterBulk-成功】批量 [{}] 完成在 {} ms", executionId, response.getTook().getMillis());
} else {
BulkItemResponse[] items = response.getItems();
for (BulkItemResponse item : items) {
if (item.isFailed()) {
log.info("2. 【afterBulk-失败】批量 [{}] 出现异常的原因 : {}", executionId, item.getFailureMessage());
break;
}
}
}
}
@Override
public void afterBulk(long executionId, BulkRequest request,
Throwable failure) {
List<DocWriteRequest<?>> requests = request.requests();
List<String> esIds = requests.stream().map(DocWriteRequest::id).collect(Collectors.toList());
log.error("3. 【afterBulk-failure失败】es执行bluk失败,失败的esId为:{}", esIds, failure);
}
};
BulkProcessor.Builder builder = BulkProcessor.builder(((bulkRequest, bulkResponseActionListener) -> {
restHighLevelClient.bulkAsync(bulkRequest, RequestOptions.DEFAULT, bulkResponseActionListener);
}), listener);
//到达10000条时刷新
builder.setBulkActions(10000);
//内存到达8M时刷新
builder.setBulkSize(new ByteSizeValue(8L, ByteSizeUnit.MB));
//设置的刷新间隔10s
builder.setFlushInterval(TimeValue.timeValueSeconds(10));
//设置允许执行的并发请求数。
builder.setConcurrentRequests(8);
//设置重试策略
builder.setBackoffPolicy(BackoffPolicy.constantBackoff(TimeValue.timeValueSeconds(1), 3));
return builder.build();
}
}
整个项目可以共用一个BulkProcessor,可以配置多种刷新策略,将数据由内存刷新到es中。
3. 增删改操作
3.1 数据准备
PUT test_demo
PUT test_demo/_mapping
{
"properties":{
"title":{
"type":"text"
},
"tag":{
"type":"keyword"
},
"publishTime":{
"type":"date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
}
}
}
GET test_demo/_search
{
"query": {
"match_all": {}
}
}
3.2 单条数据异步插入
public static void testAsyncSingle() {
IndexRequest indexRequest = new IndexRequest("test_demo");
DemoDto demoDto = new DemoDto(2001L, "印度新冠疫情失控", "世界", new Date());
indexRequest.source(JSON.toJSONString(demoDto), XContentType.JSON);
indexRequest.timeout(TimeValue.timeValueSeconds(1));
indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL);
//数据为存储而不是更新
indexRequest.create(false);
indexRequest.id(demoDto.getId() + "");
restHighLevelClient.indexAsync(indexRequest, RequestOptions.DEFAULT, new ActionListener<IndexResponse>() {
@Override
public void onResponse(IndexResponse indexResponse) {
ReplicationResponse.ShardInfo shardInfo = indexResponse.getShardInfo();
if (shardInfo.getFailed() > 0) {
for (ReplicationResponse.ShardInfo.Failure failure : shardInfo.getFailures()) {
log.error("将id为:{}的数据存入ES时存在失败的分片,原因为:{}", indexRequest.id(), failure.getCause());
}
}
}
@Override
public void onFailure(Exception e) {
log.error("{}:存储es时异常,数据信息为", indexRequest.id(), e);
}
});
}
3.3 单条数据同步插入
public static void testSingleAdd() throws IOException {
IndexRequest indexRequest = new IndexRequest("test_demo");
DemoDto demoDto = new DemoDto(3001L, "es单数据同步插入2", "IT", new Date());
indexRequest.source(JSON.toJSONString(demoDto), XContentType.JSON);
indexRequest.id("3001");
indexRequest.timeout(TimeValue.timeValueSeconds(1));
indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL);
indexRequest.create(true);
indexRequest.id(demoDto.getId() + "");
restHighLevelClient.index(indexRequest, RequestOptions.DEFAULT);
}
3.3.1 indexRequest.id(demoDto.getId() + "");
填充"_id"字段。
3.3.2 indexRequest.create(true);
设置操作类型
public IndexRequest create(boolean create) {
if (create) {
return opType(OpType.CREATE);
} else {
return opType(OpType.INDEX);
}
}
OpType.CREATE:当存在相同的_id时,插入会出现异常;
OpType.INDEX:当存在相同_id时,插入会进行覆盖;
当设置OpType.CREATE时相同id插入异常看出,es进行了乐观锁控制并发写冲突。
Elasticsearch exception [type=version_conflict_engine_exception, reason=[3001]: version conflict, document already exists (current version [3])]
3.4 批量插入
/**
* 批量插入
*/
public static void testBatch() {
List<IndexRequest> indexRequests = new ArrayList<>();
ArrayList<DemoDto> demoDtos = new ArrayList<>();
demoDtos.add(new DemoDto(1001L, "中国是中国人的中国", "中国", new Date()));
demoDtos.add(new DemoDto(1002L, "2008年奥运会", "体育", new Date()));
demoDtos.forEach(e -> {
IndexRequest request = new IndexRequest("test_demo");
//填充id
request.id(e.getId() + "");
//先不修改id
request.source(JSON.toJSONString(e), XContentType.JSON);
request.opType(DocWriteRequest.OpType.CREATE);
indexRequests.add(request);
});
indexRequests.forEach(bulkProcessor::add);
}
3.5 更新操作
更新操作传入的doc为map对象,而不是json字符串,否则会抛出异常。
public static void testSingleUpdate() throws IOException {
UpdateRequest updateRequest = new UpdateRequest("test_demo", "3001");
Map<String, Object> kvs = new HashMap<>();
kvs.put("title", "es单数据更新啦!");
updateRequest.doc(kvs);
updateRequest.timeout(TimeValue.timeValueSeconds(1));
updateRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL);
//数据为存储而不是更新
restHighLevelClient.update(updateRequest, RequestOptions.DEFAULT);
}
3.6 带条件的更新语句
public static void testSingleUpdateQuery() throws IOException {
UpdateByQueryRequest updateByQueryRequest = new UpdateByQueryRequest();
updateByQueryRequest.indices("test_demo");
updateByQueryRequest.setQuery(new TermQueryBuilder("id", 3001));
updateByQueryRequest.setScript(new Script(ScriptType.INLINE,
"painless",
"ctx._source.tag='电脑'", Collections.emptyMap()));
//数据为存储而不是更新
restHighLevelClient.updateByQuery(updateByQueryRequest, RequestOptions.DEFAULT);
}
3.7 批量更新
/**
* 批量更新
*/
private static void testBatchUpdate() {
List<UpdateRequest> updateRequests=new ArrayList<>();
//更新的数据
List<DemoDto> params=new ArrayList<>();
params.add(new DemoDto(2001L));
params.add(new DemoDto(3001L));
params.forEach(e->{
//获取id
UpdateRequest updateRequest = new UpdateRequest();
updateRequest.index("test_demo");
//更新的id
updateRequest.id(e.getId()+"");
//更新的数据
Map<String,Object> map=new HashMap<>();
map.put("title","美国社会动荡");
updateRequest.doc(map);
updateRequests.add(updateRequest);
});
updateRequests.forEach(bulkProcessor::add);
}
3.8 删除操作
/**
* 单个删除
*/
private static void testSingleDel() throws IOException {
DeleteRequest deleteRequest=new DeleteRequest();
deleteRequest.index("test_demo");
deleteRequest.id("3001");
restHighLevelClient.delete(deleteRequest,RequestOptions.DEFAULT);
}
3.9 条件删除
/**
* 单个条件删除
*/
private static void testSingleDelQuery() throws IOException {
DeleteByQueryRequest deleteByQueryRequest=new DeleteByQueryRequest();
deleteByQueryRequest.indices("test_demo");
deleteByQueryRequest.setQuery(new MatchQueryBuilder("title","国年"));
//分词式删除
restHighLevelClient.deleteByQuery(deleteByQueryRequest,RequestOptions.DEFAULT);
}
4. Bulk批量操作
4.1 简介
OpenSearch 的 Bulk API 是一种高效的数据操作方式,允许用户通过单个请求执行多个索引、更新或删除操作。
操作类型
index:插入或替换文档。
create:仅当文档不存在时插入。
update:部分更新文档。
delete:删除文档。
示例
POST /_bulk
{ "index" : { "_index" : "test", "_id" : "1" } }
{ "field1" : "value1" }
{ "delete" : { "_index" : "test", "_id" : "2" } }
{ "create" : { "_index" : "test", "_id" : "3" } }
{ "field1" : "value3" }
{ "update" : { "_index" : "test", "_id" : "1" } }
{ "doc" : { "field2" : "value2" } }
4.2 程序实现
<dependency>
<groupId>org.opensearch.client</groupId>
<artifactId>opensearch-rest-high-level-client</artifactId>
<version>1.2.4</version> <!-- 请根据你的 OpenSearch 版本选择合适的版本 -->
</dependency>
<dependency>
<groupId>org.opensearch</groupId>
<artifactId>opensearch</artifactId>
<version>1.2.4</version> <!-- 请根据你的 OpenSearch 版本选择合适的版本 -->
</dependency>
import org.opensearch.action.bulk.BulkRequest;
import org.opensearch.action.bulk.BulkResponse;
import org.opensearch.action.delete.DeleteRequest;
import org.opensearch.action.index.IndexRequest;
import org.opensearch.client.RequestOptions;
import org.opensearch.client.RestHighLevelClient;
import org.opensearch.client.indices.CreateIndexRequest;
import org.opensearch.client.indices.CreateIndexResponse;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.client.RestClient;
import org.opensearch.client.RestClientBuilder;
import java.io.IOException;
import java.util.List;
public class OpenSearchBulkExample {
private static final String INDEX_NAME = "efficiency_statistic_index";
public static void main(String[] args) {
// 创建 OpenSearch 客户端
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http"))
);
// 假设这是传入的数据
List<EfficiencyStatistic> cmsEfficiencyList = getCmsEfficiencyList();
try {
// 创建索引(如果不存在)
createIndexIfNotExists(client);
// 构建批量请求
BulkRequest bulkRequest = new BulkRequest();
// 添加删除和插入操作
for (EfficiencyStatistic efficiencyStatistic : cmsEfficiencyList) {
// 删除操作
DeleteRequest deleteRequest = new DeleteRequest(INDEX_NAME, efficiencyStatistic.getContractId());
bulkRequest.add(deleteRequest);
// 插入操作
IndexRequest indexRequest = new IndexRequest(INDEX_NAME)
.id(efficiencyStatistic.getContractId())
.source(convertToJson(efficiencyStatistic), XContentType.JSON);
bulkRequest.add(indexRequest);
}
// 执行批量请求
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
// 处理响应
if (bulkResponse.hasFailures()) {
System.out.println("Bulk request had failures: " + bulkResponse.buildFailureMessage());
} else {
System.out.println("Bulk request completed successfully.");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
// 关闭客户端
try {
client.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
private static void createIndexIfNotExists(RestHighLevelClient client) throws IOException {
CreateIndexRequest request = new CreateIndexRequest(INDEX_NAME);
CreateIndexResponse createIndexResponse = client.indices().create(request, RequestOptions.DEFAULT);
if (!createIndexResponse.isAcknowledged()) {
System.out.println("Index creation not acknowledged.");
}
}
private static String convertToJson(EfficiencyStatistic efficiencyStatistic) {
// 将 EfficiencyStatistic 对象转换为 JSON 字符串
// 这里可以使用 Jackson、Gson 等库来实现
// 例如:
// ObjectMapper mapper = new ObjectMapper();
// return mapper.writeValueAsString(efficiencyStatistic);
return "{\"contractId\":\"" + efficiencyStatistic.getContractId() + "\",\"otherField\":\"" + efficiencyStatistic.getOtherField() + "\"}";
}
private static List<EfficiencyStatistic> getCmsEfficiencyList() {
// 返回测试数据
// 这里可以根据实际情况返回数据
return List.of(
new EfficiencyStatistic("contract1", "value1"),
new EfficiencyStatistic("contract2", "value2")
);
}
}
class EfficiencyStatistic {
private String contractId;
private String otherField;
public EfficiencyStatistic(String contractId, String otherField) {
this.contractId = contractId;
this.otherField = otherField;
}
public String getContractId() {
return contractId;
}
public String getOtherField() {
return otherField;
}
}
License:
CC BY 4.0