feat:新增智能检索,全库关键字检索及文档预览和下载

This commit is contained in:
wenjinbo 2025-08-26 16:17:17 +08:00
parent 5faa1aa59b
commit 93a8fd5dc1
13 changed files with 1428 additions and 70 deletions

View File

@ -148,4 +148,12 @@ export const deleteDataset = (id: string) => {
url: '/brichat-service/datasetManage/document/deepAnalysisList',
method: 'get'
})
}
}
export function retrieval(data: { query: string; searchMethod: string }) {
return request({
url: `/brichat-service/knowledge-base/retrieval`,
method: 'post',
data
})
}

View File

@ -17,7 +17,7 @@ module.exports = {
// 配后端数据的接收方式application/json;charset=UTF-8 或 application/x-www-form-urlencoded;charset=UTF-8
contentType: 'application/json;charset=UTF-8',
// 最长请求时间
requestTimeout: 10000,
requestTimeout: 20000,
// 操作正常code支持String、Array、int多种类型
successCode: [200, 0, '200', '0'],
// 数据状态的字段名称

File diff suppressed because it is too large Load Diff

View File

@ -29,6 +29,8 @@ public class KnowledgeBaseController {
@PostMapping("/retrieval")
public ResultUtils retrieval(@RequestBody KnowledgeBaseDto knowledgeBaseDto) throws Exception{
List<RecordDto> retrievalResult = knowledgeBaseService.retrieval(knowledgeBaseDto);
return ResultUtils.success(retrievalResult);
}

View File

@ -1,17 +1,17 @@
package com.bjtds.brichat.entity.dto;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.alibaba.fastjson.annotation.JSONField;
import lombok.Data;
@Data
public class DocumentDto {
private String id;
@JsonProperty("data_source_type")
@JSONField(name = "data_source_type")
private String dataSourceType;
private String name;
@JsonProperty("doc_type")
@JSONField(name = "doc_type")
private String docType;
@JsonProperty("doc_metadata")
@JSONField(name = "doc_metadata")
private String docMetadata;
}

View File

@ -7,7 +7,7 @@ import lombok.Data;
public class KnowledgeBaseDto {
private String query;
@JsonProperty("search_method")
private String searchMethod;
}

View File

@ -1,15 +1,17 @@
package com.bjtds.brichat.entity.dto;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.alibaba.fastjson.annotation.JSONField;
import lombok.Data;
@Data
public class RecordDto {
/**分段信息*/
@JSONField(name = "segment")
private SegmentDto segmentDto;
@JsonProperty("child_chunks")
@JSONField(name = "child_chunks")
private String childChunks;
/**置信度* 例如0.99*/
private String score;
@JsonProperty("tsne_position")
@JSONField(name = "tsne_position")
private String tsnePosition;
}

View File

@ -1,6 +1,6 @@
package com.bjtds.brichat.entity.dto;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.alibaba.fastjson.annotation.JSONField;
import lombok.Data;
import java.util.List;
@ -10,39 +10,41 @@ public class SegmentDto {
private String id;
private Integer position;
@JsonProperty("document_id")
@JSONField(name = "document_id")
private String documentId;
private String content;
@JsonProperty("sign_content")
@JSONField(name = "sign_content")
private String signContent;
private String answer;
@JsonProperty("word_count")
@JSONField(name = "word_count")
private Integer wordCount;
private Integer tokens;
private List<String> keywords;
@JsonProperty("index_node_id")
@JSONField(name = "index_node_id")
private String indexNodeId;
@JsonProperty("index_node_hash")
@JSONField(name = "index_node_hash")
private String indexNodeHash;
@JsonProperty("hit_count")
@JSONField(name = "hit_count")
private Integer hitCount;
private Boolean enabled;
@JsonProperty("disabled_at")
@JSONField(name = "disabled_at")
private String disabledAt;
@JsonProperty("disabled_by")
@JSONField(name = "disabled_by")
private String disabledBy;
private String status;
@JsonProperty("created_at")
@JSONField(name = "created_at")
private String createdAt;
@JsonProperty("created_by")
@JSONField(name = "created_by")
private String createdBy;
@JsonProperty("indexing_at")
@JSONField(name = "indexing_at")
private String indexingAt;
@JsonProperty("completed_at")
@JSONField(name = "completed_at")
private String completedAt;
private String error;
@JsonProperty("stopped_at")
@JSONField(name = "stopped_at")
private String stoppedAt;
@JSONField(name = "document")
private DocumentDto documentDto;
}

View File

@ -7,6 +7,8 @@ import com.bjtds.brichat.entity.dify.*;
import com.bjtds.brichat.entity.dto.PdfConversionResponse;
import com.bjtds.brichat.entity.dto.PdfTaskDto;
import com.bjtds.brichat.mapper.postgresql.DifyDatasetsMapper;
import com.bjtds.brichat.mapper.postgresql.DifyUploadFileMapper;
import com.bjtds.brichat.service.DatasetsDocService;
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
import com.bjtds.brichat.util.Constants;
import com.fasterxml.jackson.core.JsonProcessingException;
@ -86,9 +88,18 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
@Value("${pdf.conversion.service.max-workers}")
private Integer pdfConversionMaxWorkers;
@Value("${bjtds.difyDocPreview}")
private String difyDocPreview;
@Resource
private DifyDatasetsMapper difyDatasetsMapper;
@Resource
private DifyUploadFileMapper difyUploadFileMapper;
@Resource
private DatasetsDocService datasetsDocService;
@Override
public ResponseEntity<DifyMetadata> addMetadata(String datasetId, DifyMetadata metadata) {
@ -423,10 +434,15 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
Map<String,String> document = ( Map<String,String> ) exchange.getBody().get("document");
String documentId = document.get("id");
UploadFileInfoResponse uploadFileInfoResponse = difyDatasetService.uploadFileInfo(request.getDatasetId(), documentId);
String urls = uploadFileInfoResponse.getUrl();
String fullUrl = difyUrl + urls;
setSourceUrlMatedata(request.getDatasetId(),documentId,fullUrl);
String uploadId = datasetsDocService.getUploadById(documentId);
DifyUploadFile uploadInfo = difyUploadFileMapper.getFileById(uploadId);
int lastIndex = uploadInfo.getKey().lastIndexOf('/');
String fileName = uploadInfo.getKey().substring(lastIndex + 1);
String sourceUrl = difyDocPreview +"/" +fileName;
setSourceUrlMatedata(request.getDatasetId(),documentId,sourceUrl);
return exchange;
} catch (Exception e) {

View File

@ -1,7 +1,7 @@
// src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java
package com.bjtds.brichat.service.impl;
import com.bjtds.brichat.entity.dataset.AppDataset;
import com.bjtds.brichat.entity.dataset.TUserDataset;
import com.bjtds.brichat.entity.dataset.WorkflowDatasetDto;
import com.bjtds.brichat.entity.dify.Workflow;
@ -22,10 +22,12 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.UUID;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import javax.annotation.PreDestroy;
@Slf4j
@Service
@ -53,23 +55,67 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
private final ObjectMapper objectMapper = new ObjectMapper();
// 创建线程池用于并行查询
private final ExecutorService executorService = Executors.newFixedThreadPool(10);
@Override
public List<RecordDto> retrieval(KnowledgeBaseDto knowledgeBaseDto) throws Exception {
String datasetPath = difyUrl + Constants.DATABASE_API;
List<String> datasetIds = difyDatasetsMapper.getDatasetIds();
List<RecordDto> recordDtos = Lists.newArrayList();
log.info("datasetPath:{}", datasetPath);
log.info("apiKey:{}", apiKey);
log.info("开始并行查询 {} 个数据集", datasetIds.size());
//同步查询我需要异步
for (String datasetId : datasetIds) {
List<RecordDto> recordDtoList = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto);
if (recordDtoList != null && !recordDtoList.isEmpty()) {
// 添加判空条件
recordDtos.addAll(recordDtoList);
// 使用 CompletableFuture 并行查询多个数据集
List<CompletableFuture<List<RecordDto>>> futures = datasetIds.stream()
.map(datasetId -> CompletableFuture.<List<RecordDto>>supplyAsync(() -> {
try {
log.debug("开始查询数据集: {}", datasetId);
List<RecordDto> result = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto);
log.debug("完成查询数据集: {}, 结果数量: {}", datasetId, result != null ? result.size() : 0);
return result;
} catch (Exception e) {
log.error("查询数据集 {} 时发生错误", datasetId, e);
return new ArrayList<RecordDto>(); // 返回空列表而不是抛出异常
}
}, executorService))
.collect(Collectors.toList());
// 等待所有查询完成并合并结果
List<RecordDto> recordDtos = Lists.newArrayList();
try {
CompletableFuture<Void> allFutures = CompletableFuture.allOf(
futures.toArray(new CompletableFuture[0]));
allFutures.get(); // 等待所有任务完成
// 收集所有结果
for (CompletableFuture<List<RecordDto>> future : futures) {
List<RecordDto> recordDtoList = future.get();
if (recordDtoList != null && !recordDtoList.isEmpty()) {
recordDtos.addAll(recordDtoList);
}
}
log.info("并行查询完成,总共获取到 {} 条记录", recordDtos.size());
} catch (Exception e) {
log.error("并行查询过程中发生错误", e);
throw new Exception("并行查询失败: " + e.getMessage());
}
log.info("按照置信度排序完成");
// 假设 RecordDto getScore() 方法
recordDtos.sort((dto1, dto2) -> {
try {
double score1 = Double.parseDouble(dto1.getScore());
double score2 = Double.parseDouble(dto2.getScore());
return Double.compare(score2, score1);
} catch (NumberFormatException e) {
return 0;
}
});
return recordDtos;
}
@ -326,4 +372,26 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
public List<TUserDataset> getAllUserDatasets() throws Exception {
return tUserDatasetMapper.getAllUserDatasets();
}
/**
* Bean 销毁时关闭线程池释放资源
*/
@PreDestroy
public void destroy() {
if (executorService != null && !executorService.isShutdown()) {
log.info("正在关闭知识库查询线程池...");
executorService.shutdown();
try {
if (!executorService.awaitTermination(60, java.util.concurrent.TimeUnit.SECONDS)) {
executorService.shutdownNow();
log.warn("线程池未能在60秒内正常关闭强制关闭");
}
} catch (InterruptedException e) {
executorService.shutdownNow();
Thread.currentThread().interrupt();
log.error("等待线程池关闭时被中断", e);
}
log.info("知识库查询线程池已关闭");
}
}
}

View File

@ -18,11 +18,10 @@ import java.util.List;
public class RetrievalUtil {
public static List<RecordDto> getRetrieval(String datasetPath, String apiKey, String datasetId, KnowledgeBaseDto knowledgeBaseDto) throws Exception {
public static List<RecordDto> getRetrieval(String datasetPath, String apiKey, String datasetId, KnowledgeBaseDto knowledgeBaseDto) throws Exception {
//创建httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
// String uri = "http://192.168.1.211/v1/datasets/b9b2322d-0bd9-4a7e-b6a7-14d7d8778bae/retrieve";
String uri = datasetPath +"/"+ datasetId + "/retrieve";
log.info("uri:" + uri);
//创建请求对象
@ -33,47 +32,62 @@ public class RetrievalUtil {
//组装requestbody
//reranking_model
JSONObject reranking_model = new JSONObject();
reranking_model.put("reranking_provider_name", "");
reranking_model.put("reranking_model_name", "");
reranking_model.put("reranking_provider_name", "langgenius/huggingface_tei/huggingface_tei");
reranking_model.put("reranking_model_name", "bge-reanker-v2-m3");
//retrieval_model
JSONObject retrieval_model = new JSONObject();
retrieval_model.put("search_method", knowledgeBaseDto.getSearchMethod());
retrieval_model.put("reranking_enable", false);
retrieval_model.put("reranking_mode", null);
retrieval_model.put("reranking_enable", true);
retrieval_model.put("reranking_model", reranking_model);
retrieval_model.put("weights", null);
retrieval_model.put("top_k", 1);
retrieval_model.put("score_threshold_enabled", false);
retrieval_model.put("score_threshold", null);
retrieval_model.put("top_k", 10);
retrieval_model.put("score_threshold_enabled", true);
retrieval_model.put("score_threshold", 0.5);
//构造请求体
JSONObject jsonObject = new JSONObject();
jsonObject.put("query", knowledgeBaseDto.getQuery());
String query = knowledgeBaseDto.getQuery();
log.info("原始查询参数: {}", query);
jsonObject.put("query", query);
jsonObject.put("retrieval_model", retrieval_model);
StringEntity entity = new StringEntity(jsonObject.toString());
//指定请求编码方式
entity.setContentEncoding("utf-8");
String requestBody = jsonObject.toString();
log.info("请求体内容: {}", requestBody);
StringEntity entity = new StringEntity(requestBody, "UTF-8");
//指定数据格式
entity.setContentType("application/json");
entity.setContentType("application/json; charset=UTF-8");
httpPost.setEntity(entity);
//发送请求
CloseableHttpResponse response = httpClient.execute(httpPost);
//发送请求
CloseableHttpResponse response = httpClient.execute(httpPost);
//解析返回结果
int code = response.getStatusLine().getStatusCode();
System.out.println(code);
//解析返回结果
int code = response.getStatusLine().getStatusCode();
System.out.println(code);
HttpEntity resultEntity = response.getEntity();
String stringEntity = EntityUtils.toString(resultEntity);
System.out.println(stringEntity);
HttpEntity resultEntity = response.getEntity();
String stringEntity = EntityUtils.toString(resultEntity);
// log.info("API返回的原始JSON数据: {}", stringEntity);
JSONObject jsonResult = JSON.parseObject(stringEntity);
List<RecordDto> recordDtoList = (List<RecordDto>) jsonResult.get("records");
//关闭资源
response.close();
httpClient.close();
return recordDtoList;
JSONObject jsonResult = JSON.parseObject(stringEntity);
//log.info("解析后的records字段: {}", jsonResult.getString("records"));
// 正确地将 JSON 数组转换为 RecordDto 列表
List<RecordDto> recordDtoList = JSON.parseArray(jsonResult.getJSONArray("records").toJSONString(), RecordDto.class);
// 检查解析后的数据
if (recordDtoList != null && !recordDtoList.isEmpty()) {
RecordDto firstRecord = recordDtoList.get(0);
//log.info("第一条记录的segmentDto是否为null: {}", firstRecord.getSegmentDto() == null);
if (firstRecord.getSegmentDto() != null) {
//log.info("第一条记录的segmentDto内容: {}", firstRecord.getSegmentDto());
}
// log.info("第一条记录的完整内容: {}", firstRecord);
}
//关闭资源
response.close();
httpClient.close();
return recordDtoList;
}
}

View File

@ -41,6 +41,7 @@ bjtds:
pdfFilePath: ${pdf-file-path:D:\bjtds\pdfFile\}
difyUploadPath: ${dify-upload-path:D:\bjtds\difyUploadFile\}
pdfFileTempPath: ${pdf-file-temp-path:D:\bjtds\pdfTempFile\pdfFile\}
difyDocPreview: ${dify-doc-preview:http://192.168.1.211/dify-doc-preview}
dify:
url: ${dify-url:http://192.168.1.211:16780} # 北京环境 Dify 服务地址

View File

@ -41,6 +41,8 @@ bjtds:
pdfFilePath: ${pdf-file-path:D:\bjtds\pdfFile\}
difyUploadPath: ${dify-upload-path:D:\bjtds\difyUploadFile\}
pdfFileTempPath: ${pdf-file-temp-path:D:\bjtds\pdfTempFile\pdfFile\}
difyDocPreview: ${dify-doc-preview:http://192.168.8.253/dify-doc-preview}
dify:
url: ${dify-url:http://192.168.8.253:16780} # 武汉环境 Dify 服务地址