From 93a8fd5dc13796421d37a90180a3bb00abafabb6 Mon Sep 17 00:00:00 2001 From: wenjinbo <599483010@qq.com> Date: Tue, 26 Aug 2025 16:17:17 +0800 Subject: [PATCH] =?UTF-8?q?feat:=E6=96=B0=E5=A2=9E=E6=99=BA=E8=83=BD?= =?UTF-8?q?=E6=A3=80=E7=B4=A2,=E5=85=A8=E5=BA=93=E5=85=B3=E9=94=AE?= =?UTF-8?q?=E5=AD=97=E6=A3=80=E7=B4=A2=E5=8F=8A=E6=96=87=E6=A1=A3=E9=A2=84?= =?UTF-8?q?=E8=A7=88=E5=92=8C=E4=B8=8B=E8=BD=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chat-client/src/api/dataset.ts | 10 +- chat-client/src/config/net.config.js | 2 +- .../views/chatweb/intelRetrieval/index.vue | 1243 +++++++++++++++++ .../controller/KnowledgeBaseController.java | 2 + .../bjtds/brichat/entity/dto/DocumentDto.java | 8 +- .../brichat/entity/dto/KnowledgeBaseDto.java | 2 +- .../bjtds/brichat/entity/dto/RecordDto.java | 10 +- .../bjtds/brichat/entity/dto/SegmentDto.java | 30 +- .../dify/impl/DifyDatasetApiServiceImpl.java | 24 +- .../impl/KnowledgeBaseServiceImpl.java | 94 +- .../com/bjtds/brichat/util/RetrievalUtil.java | 70 +- .../main/resources/application-beijing.yml | 1 + .../src/main/resources/application-wuhan.yml | 2 + 13 files changed, 1428 insertions(+), 70 deletions(-) create mode 100644 chat-client/src/views/chatweb/intelRetrieval/index.vue diff --git a/chat-client/src/api/dataset.ts b/chat-client/src/api/dataset.ts index c172975..9bd3335 100644 --- a/chat-client/src/api/dataset.ts +++ b/chat-client/src/api/dataset.ts @@ -148,4 +148,12 @@ export const deleteDataset = (id: string) => { url: '/brichat-service/datasetManage/document/deepAnalysisList', method: 'get' }) - } \ No newline at end of file + } + +export function retrieval(data: { query: string; searchMethod: string }) { + return request({ + url: `/brichat-service/knowledge-base/retrieval`, + method: 'post', + data + }) +} diff --git a/chat-client/src/config/net.config.js b/chat-client/src/config/net.config.js index 4aa9801..45ce29e 100644 --- a/chat-client/src/config/net.config.js +++ b/chat-client/src/config/net.config.js @@ -17,7 +17,7 @@ module.exports = { // 配后端数据的接收方式application/json;charset=UTF-8 或 application/x-www-form-urlencoded;charset=UTF-8 contentType: 'application/json;charset=UTF-8', // 最长请求时间 - requestTimeout: 10000, + requestTimeout: 20000, // 操作正常code,支持String、Array、int多种类型 successCode: [200, 0, '200', '0'], // 数据状态的字段名称 diff --git a/chat-client/src/views/chatweb/intelRetrieval/index.vue b/chat-client/src/views/chatweb/intelRetrieval/index.vue new file mode 100644 index 0000000..d2bfa9e --- /dev/null +++ b/chat-client/src/views/chatweb/intelRetrieval/index.vue @@ -0,0 +1,1243 @@ + + + + + diff --git a/chat-server/src/main/java/com/bjtds/brichat/controller/KnowledgeBaseController.java b/chat-server/src/main/java/com/bjtds/brichat/controller/KnowledgeBaseController.java index f20b3bf..62f2baf 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/controller/KnowledgeBaseController.java +++ b/chat-server/src/main/java/com/bjtds/brichat/controller/KnowledgeBaseController.java @@ -29,6 +29,8 @@ public class KnowledgeBaseController { @PostMapping("/retrieval") public ResultUtils retrieval(@RequestBody KnowledgeBaseDto knowledgeBaseDto) throws Exception{ List retrievalResult = knowledgeBaseService.retrieval(knowledgeBaseDto); + + return ResultUtils.success(retrievalResult); } diff --git a/chat-server/src/main/java/com/bjtds/brichat/entity/dto/DocumentDto.java b/chat-server/src/main/java/com/bjtds/brichat/entity/dto/DocumentDto.java index 2505b22..057fe83 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/entity/dto/DocumentDto.java +++ b/chat-server/src/main/java/com/bjtds/brichat/entity/dto/DocumentDto.java @@ -1,17 +1,17 @@ package com.bjtds.brichat.entity.dto; -import com.fasterxml.jackson.annotation.JsonProperty; +import com.alibaba.fastjson.annotation.JSONField; import lombok.Data; @Data public class DocumentDto { private String id; - @JsonProperty("data_source_type") + @JSONField(name = "data_source_type") private String dataSourceType; private String name; - @JsonProperty("doc_type") + @JSONField(name = "doc_type") private String docType; - @JsonProperty("doc_metadata") + @JSONField(name = "doc_metadata") private String docMetadata; } diff --git a/chat-server/src/main/java/com/bjtds/brichat/entity/dto/KnowledgeBaseDto.java b/chat-server/src/main/java/com/bjtds/brichat/entity/dto/KnowledgeBaseDto.java index b9396fe..044e984 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/entity/dto/KnowledgeBaseDto.java +++ b/chat-server/src/main/java/com/bjtds/brichat/entity/dto/KnowledgeBaseDto.java @@ -7,7 +7,7 @@ import lombok.Data; public class KnowledgeBaseDto { private String query; - @JsonProperty("search_method") + private String searchMethod; } diff --git a/chat-server/src/main/java/com/bjtds/brichat/entity/dto/RecordDto.java b/chat-server/src/main/java/com/bjtds/brichat/entity/dto/RecordDto.java index 3ca1c98..a89771c 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/entity/dto/RecordDto.java +++ b/chat-server/src/main/java/com/bjtds/brichat/entity/dto/RecordDto.java @@ -1,15 +1,17 @@ package com.bjtds.brichat.entity.dto; -import com.fasterxml.jackson.annotation.JsonProperty; +import com.alibaba.fastjson.annotation.JSONField; import lombok.Data; @Data public class RecordDto { - + /**分段信息*/ + @JSONField(name = "segment") private SegmentDto segmentDto; - @JsonProperty("child_chunks") + @JSONField(name = "child_chunks") private String childChunks; + /**置信度* 例如0.99*/ private String score; - @JsonProperty("tsne_position") + @JSONField(name = "tsne_position") private String tsnePosition; } diff --git a/chat-server/src/main/java/com/bjtds/brichat/entity/dto/SegmentDto.java b/chat-server/src/main/java/com/bjtds/brichat/entity/dto/SegmentDto.java index fd2a7bc..4368ee3 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/entity/dto/SegmentDto.java +++ b/chat-server/src/main/java/com/bjtds/brichat/entity/dto/SegmentDto.java @@ -1,6 +1,6 @@ package com.bjtds.brichat.entity.dto; -import com.fasterxml.jackson.annotation.JsonProperty; +import com.alibaba.fastjson.annotation.JSONField; import lombok.Data; import java.util.List; @@ -10,39 +10,41 @@ public class SegmentDto { private String id; private Integer position; - @JsonProperty("document_id") + @JSONField(name = "document_id") private String documentId; private String content; - @JsonProperty("sign_content") + @JSONField(name = "sign_content") private String signContent; private String answer; - @JsonProperty("word_count") + @JSONField(name = "word_count") private Integer wordCount; private Integer tokens; private List keywords; - @JsonProperty("index_node_id") + @JSONField(name = "index_node_id") private String indexNodeId; - @JsonProperty("index_node_hash") + @JSONField(name = "index_node_hash") private String indexNodeHash; - @JsonProperty("hit_count") + @JSONField(name = "hit_count") private Integer hitCount; private Boolean enabled; - @JsonProperty("disabled_at") + @JSONField(name = "disabled_at") private String disabledAt; - @JsonProperty("disabled_by") + @JSONField(name = "disabled_by") private String disabledBy; private String status; - @JsonProperty("created_at") + @JSONField(name = "created_at") private String createdAt; - @JsonProperty("created_by") + @JSONField(name = "created_by") private String createdBy; - @JsonProperty("indexing_at") + @JSONField(name = "indexing_at") private String indexingAt; - @JsonProperty("completed_at") + @JSONField(name = "completed_at") private String completedAt; private String error; - @JsonProperty("stopped_at") + @JSONField(name = "stopped_at") private String stoppedAt; + + @JSONField(name = "document") private DocumentDto documentDto; } diff --git a/chat-server/src/main/java/com/bjtds/brichat/service/dify/impl/DifyDatasetApiServiceImpl.java b/chat-server/src/main/java/com/bjtds/brichat/service/dify/impl/DifyDatasetApiServiceImpl.java index 309979f..fe478c8 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/service/dify/impl/DifyDatasetApiServiceImpl.java +++ b/chat-server/src/main/java/com/bjtds/brichat/service/dify/impl/DifyDatasetApiServiceImpl.java @@ -7,6 +7,8 @@ import com.bjtds.brichat.entity.dify.*; import com.bjtds.brichat.entity.dto.PdfConversionResponse; import com.bjtds.brichat.entity.dto.PdfTaskDto; import com.bjtds.brichat.mapper.postgresql.DifyDatasetsMapper; +import com.bjtds.brichat.mapper.postgresql.DifyUploadFileMapper; +import com.bjtds.brichat.service.DatasetsDocService; import com.bjtds.brichat.service.dify.DifyDatasetApiService; import com.bjtds.brichat.util.Constants; import com.fasterxml.jackson.core.JsonProcessingException; @@ -86,9 +88,18 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { @Value("${pdf.conversion.service.max-workers}") private Integer pdfConversionMaxWorkers; + @Value("${bjtds.difyDocPreview}") + private String difyDocPreview; + @Resource private DifyDatasetsMapper difyDatasetsMapper; + @Resource + private DifyUploadFileMapper difyUploadFileMapper; + + @Resource + private DatasetsDocService datasetsDocService; + @Override public ResponseEntity addMetadata(String datasetId, DifyMetadata metadata) { @@ -423,10 +434,15 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { Map document = ( Map ) exchange.getBody().get("document"); String documentId = document.get("id"); - UploadFileInfoResponse uploadFileInfoResponse = difyDatasetService.uploadFileInfo(request.getDatasetId(), documentId); - String urls = uploadFileInfoResponse.getUrl(); - String fullUrl = difyUrl + urls; - setSourceUrlMatedata(request.getDatasetId(),documentId,fullUrl); + String uploadId = datasetsDocService.getUploadById(documentId); + DifyUploadFile uploadInfo = difyUploadFileMapper.getFileById(uploadId); + + + int lastIndex = uploadInfo.getKey().lastIndexOf('/'); + String fileName = uploadInfo.getKey().substring(lastIndex + 1); + String sourceUrl = difyDocPreview +"/" +fileName; + + setSourceUrlMatedata(request.getDatasetId(),documentId,sourceUrl); return exchange; } catch (Exception e) { diff --git a/chat-server/src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java b/chat-server/src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java index 447f7e6..bf3d7a9 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java +++ b/chat-server/src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java @@ -1,7 +1,7 @@ -// src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java + package com.bjtds.brichat.service.impl; -import com.bjtds.brichat.entity.dataset.AppDataset; + import com.bjtds.brichat.entity.dataset.TUserDataset; import com.bjtds.brichat.entity.dataset.WorkflowDatasetDto; import com.bjtds.brichat.entity.dify.Workflow; @@ -22,10 +22,12 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.UUID; +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.stream.Collectors; +import javax.annotation.PreDestroy; @Slf4j @Service @@ -53,23 +55,67 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { private final ObjectMapper objectMapper = new ObjectMapper(); + // 创建线程池用于并行查询 + private final ExecutorService executorService = Executors.newFixedThreadPool(10); + @Override public List retrieval(KnowledgeBaseDto knowledgeBaseDto) throws Exception { String datasetPath = difyUrl + Constants.DATABASE_API; List datasetIds = difyDatasetsMapper.getDatasetIds(); - List recordDtos = Lists.newArrayList(); log.info("datasetPath:{}", datasetPath); log.info("apiKey:{}", apiKey); + log.info("开始并行查询 {} 个数据集", datasetIds.size()); - //同步查询,我需要异步 - for (String datasetId : datasetIds) { - List recordDtoList = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto); - if (recordDtoList != null && !recordDtoList.isEmpty()) { - // 添加判空条件 - recordDtos.addAll(recordDtoList); + // 使用 CompletableFuture 并行查询多个数据集 + List>> futures = datasetIds.stream() + .map(datasetId -> CompletableFuture.>supplyAsync(() -> { + try { + log.debug("开始查询数据集: {}", datasetId); + List result = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto); + log.debug("完成查询数据集: {}, 结果数量: {}", datasetId, result != null ? result.size() : 0); + return result; + } catch (Exception e) { + log.error("查询数据集 {} 时发生错误", datasetId, e); + return new ArrayList(); // 返回空列表而不是抛出异常 + } + }, executorService)) + .collect(Collectors.toList()); + + + // 等待所有查询完成并合并结果 + List recordDtos = Lists.newArrayList(); + try { + CompletableFuture allFutures = CompletableFuture.allOf( + futures.toArray(new CompletableFuture[0])); + + allFutures.get(); // 等待所有任务完成 + + // 收集所有结果 + for (CompletableFuture> future : futures) { + List recordDtoList = future.get(); + if (recordDtoList != null && !recordDtoList.isEmpty()) { + recordDtos.addAll(recordDtoList); + } } + + log.info("并行查询完成,总共获取到 {} 条记录", recordDtos.size()); + } catch (Exception e) { + log.error("并行查询过程中发生错误", e); + throw new Exception("并行查询失败: " + e.getMessage()); } + + log.info("按照置信度排序完成"); + // 假设 RecordDto 有 getScore() 方法 + recordDtos.sort((dto1, dto2) -> { + try { + double score1 = Double.parseDouble(dto1.getScore()); + double score2 = Double.parseDouble(dto2.getScore()); + return Double.compare(score2, score1); + } catch (NumberFormatException e) { + return 0; + } + }); return recordDtos; } @@ -326,4 +372,26 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { public List getAllUserDatasets() throws Exception { return tUserDatasetMapper.getAllUserDatasets(); } + + /** + * 在 Bean 销毁时关闭线程池,释放资源 + */ + @PreDestroy + public void destroy() { + if (executorService != null && !executorService.isShutdown()) { + log.info("正在关闭知识库查询线程池..."); + executorService.shutdown(); + try { + if (!executorService.awaitTermination(60, java.util.concurrent.TimeUnit.SECONDS)) { + executorService.shutdownNow(); + log.warn("线程池未能在60秒内正常关闭,强制关闭"); + } + } catch (InterruptedException e) { + executorService.shutdownNow(); + Thread.currentThread().interrupt(); + log.error("等待线程池关闭时被中断", e); + } + log.info("知识库查询线程池已关闭"); + } + } } \ No newline at end of file diff --git a/chat-server/src/main/java/com/bjtds/brichat/util/RetrievalUtil.java b/chat-server/src/main/java/com/bjtds/brichat/util/RetrievalUtil.java index 03e9973..2821fcb 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/util/RetrievalUtil.java +++ b/chat-server/src/main/java/com/bjtds/brichat/util/RetrievalUtil.java @@ -18,11 +18,10 @@ import java.util.List; public class RetrievalUtil { - public static List getRetrieval(String datasetPath, String apiKey, String datasetId, KnowledgeBaseDto knowledgeBaseDto) throws Exception { +public static List getRetrieval(String datasetPath, String apiKey, String datasetId, KnowledgeBaseDto knowledgeBaseDto) throws Exception { //创建httpclient对象 CloseableHttpClient httpClient = HttpClients.createDefault(); -// String uri = "http://192.168.1.211/v1/datasets/b9b2322d-0bd9-4a7e-b6a7-14d7d8778bae/retrieve"; String uri = datasetPath +"/"+ datasetId + "/retrieve"; log.info("uri:" + uri); //创建请求对象 @@ -33,47 +32,62 @@ public class RetrievalUtil { //组装requestbody //reranking_model JSONObject reranking_model = new JSONObject(); - reranking_model.put("reranking_provider_name", ""); - reranking_model.put("reranking_model_name", ""); + reranking_model.put("reranking_provider_name", "langgenius/huggingface_tei/huggingface_tei"); + reranking_model.put("reranking_model_name", "bge-reanker-v2-m3"); //retrieval_model JSONObject retrieval_model = new JSONObject(); retrieval_model.put("search_method", knowledgeBaseDto.getSearchMethod()); - retrieval_model.put("reranking_enable", false); - retrieval_model.put("reranking_mode", null); + retrieval_model.put("reranking_enable", true); retrieval_model.put("reranking_model", reranking_model); retrieval_model.put("weights", null); - retrieval_model.put("top_k", 1); - retrieval_model.put("score_threshold_enabled", false); - retrieval_model.put("score_threshold", null); + retrieval_model.put("top_k", 10); + retrieval_model.put("score_threshold_enabled", true); + retrieval_model.put("score_threshold", 0.5); //构造请求体 JSONObject jsonObject = new JSONObject(); - jsonObject.put("query", knowledgeBaseDto.getQuery()); + String query = knowledgeBaseDto.getQuery(); + log.info("原始查询参数: {}", query); + jsonObject.put("query", query); jsonObject.put("retrieval_model", retrieval_model); - StringEntity entity = new StringEntity(jsonObject.toString()); - //指定请求编码方式 - entity.setContentEncoding("utf-8"); + String requestBody = jsonObject.toString(); + log.info("请求体内容: {}", requestBody); + + StringEntity entity = new StringEntity(requestBody, "UTF-8"); //指定数据格式 - entity.setContentType("application/json"); + entity.setContentType("application/json; charset=UTF-8"); httpPost.setEntity(entity); - //发送请求 - CloseableHttpResponse response = httpClient.execute(httpPost); + //发送请求 + CloseableHttpResponse response = httpClient.execute(httpPost); - //解析返回结果 - int code = response.getStatusLine().getStatusCode(); - System.out.println(code); + //解析返回结果 + int code = response.getStatusLine().getStatusCode(); + System.out.println(code); - HttpEntity resultEntity = response.getEntity(); - String stringEntity = EntityUtils.toString(resultEntity); - System.out.println(stringEntity); + HttpEntity resultEntity = response.getEntity(); + String stringEntity = EntityUtils.toString(resultEntity); + // log.info("API返回的原始JSON数据: {}", stringEntity); - JSONObject jsonResult = JSON.parseObject(stringEntity); - List recordDtoList = (List) jsonResult.get("records"); - //关闭资源 - response.close(); - httpClient.close(); - return recordDtoList; + JSONObject jsonResult = JSON.parseObject(stringEntity); + //log.info("解析后的records字段: {}", jsonResult.getString("records")); + + // 正确地将 JSON 数组转换为 RecordDto 列表 + List recordDtoList = JSON.parseArray(jsonResult.getJSONArray("records").toJSONString(), RecordDto.class); + + // 检查解析后的数据 + if (recordDtoList != null && !recordDtoList.isEmpty()) { + RecordDto firstRecord = recordDtoList.get(0); + //log.info("第一条记录的segmentDto是否为null: {}", firstRecord.getSegmentDto() == null); + if (firstRecord.getSegmentDto() != null) { + //log.info("第一条记录的segmentDto内容: {}", firstRecord.getSegmentDto()); + } + // log.info("第一条记录的完整内容: {}", firstRecord); } + //关闭资源 + response.close(); + httpClient.close(); + return recordDtoList; +} } diff --git a/chat-server/src/main/resources/application-beijing.yml b/chat-server/src/main/resources/application-beijing.yml index 7ab0a1a..027fefa 100644 --- a/chat-server/src/main/resources/application-beijing.yml +++ b/chat-server/src/main/resources/application-beijing.yml @@ -41,6 +41,7 @@ bjtds: pdfFilePath: ${pdf-file-path:D:\bjtds\pdfFile\} difyUploadPath: ${dify-upload-path:D:\bjtds\difyUploadFile\} pdfFileTempPath: ${pdf-file-temp-path:D:\bjtds\pdfTempFile\pdfFile\} + difyDocPreview: ${dify-doc-preview:http://192.168.1.211/dify-doc-preview} dify: url: ${dify-url:http://192.168.1.211:16780} # 北京环境 Dify 服务地址 diff --git a/chat-server/src/main/resources/application-wuhan.yml b/chat-server/src/main/resources/application-wuhan.yml index 10b5980..91df9fe 100644 --- a/chat-server/src/main/resources/application-wuhan.yml +++ b/chat-server/src/main/resources/application-wuhan.yml @@ -41,6 +41,8 @@ bjtds: pdfFilePath: ${pdf-file-path:D:\bjtds\pdfFile\} difyUploadPath: ${dify-upload-path:D:\bjtds\difyUploadFile\} pdfFileTempPath: ${pdf-file-temp-path:D:\bjtds\pdfTempFile\pdfFile\} + difyDocPreview: ${dify-doc-preview:http://192.168.8.253/dify-doc-preview} + dify: url: ${dify-url:http://192.168.8.253:16780} # 武汉环境 Dify 服务地址