feat:新增智能检索,全库关键字检索及文档预览和下载

This commit is contained in:
wenjinbo 2025-08-26 16:17:17 +08:00
parent 5faa1aa59b
commit 93a8fd5dc1
13 changed files with 1428 additions and 70 deletions

View File

@ -148,4 +148,12 @@ export const deleteDataset = (id: string) => {
url: '/brichat-service/datasetManage/document/deepAnalysisList', url: '/brichat-service/datasetManage/document/deepAnalysisList',
method: 'get' method: 'get'
}) })
} }
export function retrieval(data: { query: string; searchMethod: string }) {
return request({
url: `/brichat-service/knowledge-base/retrieval`,
method: 'post',
data
})
}

View File

@ -17,7 +17,7 @@ module.exports = {
// 配后端数据的接收方式application/json;charset=UTF-8 或 application/x-www-form-urlencoded;charset=UTF-8 // 配后端数据的接收方式application/json;charset=UTF-8 或 application/x-www-form-urlencoded;charset=UTF-8
contentType: 'application/json;charset=UTF-8', contentType: 'application/json;charset=UTF-8',
// 最长请求时间 // 最长请求时间
requestTimeout: 10000, requestTimeout: 20000,
// 操作正常code支持String、Array、int多种类型 // 操作正常code支持String、Array、int多种类型
successCode: [200, 0, '200', '0'], successCode: [200, 0, '200', '0'],
// 数据状态的字段名称 // 数据状态的字段名称

File diff suppressed because it is too large Load Diff

View File

@ -29,6 +29,8 @@ public class KnowledgeBaseController {
@PostMapping("/retrieval") @PostMapping("/retrieval")
public ResultUtils retrieval(@RequestBody KnowledgeBaseDto knowledgeBaseDto) throws Exception{ public ResultUtils retrieval(@RequestBody KnowledgeBaseDto knowledgeBaseDto) throws Exception{
List<RecordDto> retrievalResult = knowledgeBaseService.retrieval(knowledgeBaseDto); List<RecordDto> retrievalResult = knowledgeBaseService.retrieval(knowledgeBaseDto);
return ResultUtils.success(retrievalResult); return ResultUtils.success(retrievalResult);
} }

View File

@ -1,17 +1,17 @@
package com.bjtds.brichat.entity.dto; package com.bjtds.brichat.entity.dto;
import com.fasterxml.jackson.annotation.JsonProperty; import com.alibaba.fastjson.annotation.JSONField;
import lombok.Data; import lombok.Data;
@Data @Data
public class DocumentDto { public class DocumentDto {
private String id; private String id;
@JsonProperty("data_source_type") @JSONField(name = "data_source_type")
private String dataSourceType; private String dataSourceType;
private String name; private String name;
@JsonProperty("doc_type") @JSONField(name = "doc_type")
private String docType; private String docType;
@JsonProperty("doc_metadata") @JSONField(name = "doc_metadata")
private String docMetadata; private String docMetadata;
} }

View File

@ -7,7 +7,7 @@ import lombok.Data;
public class KnowledgeBaseDto { public class KnowledgeBaseDto {
private String query; private String query;
@JsonProperty("search_method")
private String searchMethod; private String searchMethod;
} }

View File

@ -1,15 +1,17 @@
package com.bjtds.brichat.entity.dto; package com.bjtds.brichat.entity.dto;
import com.fasterxml.jackson.annotation.JsonProperty; import com.alibaba.fastjson.annotation.JSONField;
import lombok.Data; import lombok.Data;
@Data @Data
public class RecordDto { public class RecordDto {
/**分段信息*/
@JSONField(name = "segment")
private SegmentDto segmentDto; private SegmentDto segmentDto;
@JsonProperty("child_chunks") @JSONField(name = "child_chunks")
private String childChunks; private String childChunks;
/**置信度* 例如0.99*/
private String score; private String score;
@JsonProperty("tsne_position") @JSONField(name = "tsne_position")
private String tsnePosition; private String tsnePosition;
} }

View File

@ -1,6 +1,6 @@
package com.bjtds.brichat.entity.dto; package com.bjtds.brichat.entity.dto;
import com.fasterxml.jackson.annotation.JsonProperty; import com.alibaba.fastjson.annotation.JSONField;
import lombok.Data; import lombok.Data;
import java.util.List; import java.util.List;
@ -10,39 +10,41 @@ public class SegmentDto {
private String id; private String id;
private Integer position; private Integer position;
@JsonProperty("document_id") @JSONField(name = "document_id")
private String documentId; private String documentId;
private String content; private String content;
@JsonProperty("sign_content") @JSONField(name = "sign_content")
private String signContent; private String signContent;
private String answer; private String answer;
@JsonProperty("word_count") @JSONField(name = "word_count")
private Integer wordCount; private Integer wordCount;
private Integer tokens; private Integer tokens;
private List<String> keywords; private List<String> keywords;
@JsonProperty("index_node_id") @JSONField(name = "index_node_id")
private String indexNodeId; private String indexNodeId;
@JsonProperty("index_node_hash") @JSONField(name = "index_node_hash")
private String indexNodeHash; private String indexNodeHash;
@JsonProperty("hit_count") @JSONField(name = "hit_count")
private Integer hitCount; private Integer hitCount;
private Boolean enabled; private Boolean enabled;
@JsonProperty("disabled_at") @JSONField(name = "disabled_at")
private String disabledAt; private String disabledAt;
@JsonProperty("disabled_by") @JSONField(name = "disabled_by")
private String disabledBy; private String disabledBy;
private String status; private String status;
@JsonProperty("created_at") @JSONField(name = "created_at")
private String createdAt; private String createdAt;
@JsonProperty("created_by") @JSONField(name = "created_by")
private String createdBy; private String createdBy;
@JsonProperty("indexing_at") @JSONField(name = "indexing_at")
private String indexingAt; private String indexingAt;
@JsonProperty("completed_at") @JSONField(name = "completed_at")
private String completedAt; private String completedAt;
private String error; private String error;
@JsonProperty("stopped_at") @JSONField(name = "stopped_at")
private String stoppedAt; private String stoppedAt;
@JSONField(name = "document")
private DocumentDto documentDto; private DocumentDto documentDto;
} }

View File

@ -7,6 +7,8 @@ import com.bjtds.brichat.entity.dify.*;
import com.bjtds.brichat.entity.dto.PdfConversionResponse; import com.bjtds.brichat.entity.dto.PdfConversionResponse;
import com.bjtds.brichat.entity.dto.PdfTaskDto; import com.bjtds.brichat.entity.dto.PdfTaskDto;
import com.bjtds.brichat.mapper.postgresql.DifyDatasetsMapper; import com.bjtds.brichat.mapper.postgresql.DifyDatasetsMapper;
import com.bjtds.brichat.mapper.postgresql.DifyUploadFileMapper;
import com.bjtds.brichat.service.DatasetsDocService;
import com.bjtds.brichat.service.dify.DifyDatasetApiService; import com.bjtds.brichat.service.dify.DifyDatasetApiService;
import com.bjtds.brichat.util.Constants; import com.bjtds.brichat.util.Constants;
import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.JsonProcessingException;
@ -86,9 +88,18 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
@Value("${pdf.conversion.service.max-workers}") @Value("${pdf.conversion.service.max-workers}")
private Integer pdfConversionMaxWorkers; private Integer pdfConversionMaxWorkers;
@Value("${bjtds.difyDocPreview}")
private String difyDocPreview;
@Resource @Resource
private DifyDatasetsMapper difyDatasetsMapper; private DifyDatasetsMapper difyDatasetsMapper;
@Resource
private DifyUploadFileMapper difyUploadFileMapper;
@Resource
private DatasetsDocService datasetsDocService;
@Override @Override
public ResponseEntity<DifyMetadata> addMetadata(String datasetId, DifyMetadata metadata) { public ResponseEntity<DifyMetadata> addMetadata(String datasetId, DifyMetadata metadata) {
@ -423,10 +434,15 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
Map<String,String> document = ( Map<String,String> ) exchange.getBody().get("document"); Map<String,String> document = ( Map<String,String> ) exchange.getBody().get("document");
String documentId = document.get("id"); String documentId = document.get("id");
UploadFileInfoResponse uploadFileInfoResponse = difyDatasetService.uploadFileInfo(request.getDatasetId(), documentId); String uploadId = datasetsDocService.getUploadById(documentId);
String urls = uploadFileInfoResponse.getUrl(); DifyUploadFile uploadInfo = difyUploadFileMapper.getFileById(uploadId);
String fullUrl = difyUrl + urls;
setSourceUrlMatedata(request.getDatasetId(),documentId,fullUrl);
int lastIndex = uploadInfo.getKey().lastIndexOf('/');
String fileName = uploadInfo.getKey().substring(lastIndex + 1);
String sourceUrl = difyDocPreview +"/" +fileName;
setSourceUrlMatedata(request.getDatasetId(),documentId,sourceUrl);
return exchange; return exchange;
} catch (Exception e) { } catch (Exception e) {

View File

@ -1,7 +1,7 @@
// src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java
package com.bjtds.brichat.service.impl; package com.bjtds.brichat.service.impl;
import com.bjtds.brichat.entity.dataset.AppDataset;
import com.bjtds.brichat.entity.dataset.TUserDataset; import com.bjtds.brichat.entity.dataset.TUserDataset;
import com.bjtds.brichat.entity.dataset.WorkflowDatasetDto; import com.bjtds.brichat.entity.dataset.WorkflowDatasetDto;
import com.bjtds.brichat.entity.dify.Workflow; import com.bjtds.brichat.entity.dify.Workflow;
@ -22,10 +22,12 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.ArrayList; import java.util.*;
import java.util.Collections; import java.util.concurrent.CompletableFuture;
import java.util.List; import java.util.concurrent.ExecutorService;
import java.util.UUID; import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import javax.annotation.PreDestroy;
@Slf4j @Slf4j
@Service @Service
@ -53,23 +55,67 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
private final ObjectMapper objectMapper = new ObjectMapper(); private final ObjectMapper objectMapper = new ObjectMapper();
// 创建线程池用于并行查询
private final ExecutorService executorService = Executors.newFixedThreadPool(10);
@Override @Override
public List<RecordDto> retrieval(KnowledgeBaseDto knowledgeBaseDto) throws Exception { public List<RecordDto> retrieval(KnowledgeBaseDto knowledgeBaseDto) throws Exception {
String datasetPath = difyUrl + Constants.DATABASE_API; String datasetPath = difyUrl + Constants.DATABASE_API;
List<String> datasetIds = difyDatasetsMapper.getDatasetIds(); List<String> datasetIds = difyDatasetsMapper.getDatasetIds();
List<RecordDto> recordDtos = Lists.newArrayList();
log.info("datasetPath:{}", datasetPath); log.info("datasetPath:{}", datasetPath);
log.info("apiKey:{}", apiKey); log.info("apiKey:{}", apiKey);
log.info("开始并行查询 {} 个数据集", datasetIds.size());
//同步查询我需要异步 // 使用 CompletableFuture 并行查询多个数据集
for (String datasetId : datasetIds) { List<CompletableFuture<List<RecordDto>>> futures = datasetIds.stream()
List<RecordDto> recordDtoList = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto); .map(datasetId -> CompletableFuture.<List<RecordDto>>supplyAsync(() -> {
if (recordDtoList != null && !recordDtoList.isEmpty()) { try {
// 添加判空条件 log.debug("开始查询数据集: {}", datasetId);
recordDtos.addAll(recordDtoList); List<RecordDto> result = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto);
log.debug("完成查询数据集: {}, 结果数量: {}", datasetId, result != null ? result.size() : 0);
return result;
} catch (Exception e) {
log.error("查询数据集 {} 时发生错误", datasetId, e);
return new ArrayList<RecordDto>(); // 返回空列表而不是抛出异常
}
}, executorService))
.collect(Collectors.toList());
// 等待所有查询完成并合并结果
List<RecordDto> recordDtos = Lists.newArrayList();
try {
CompletableFuture<Void> allFutures = CompletableFuture.allOf(
futures.toArray(new CompletableFuture[0]));
allFutures.get(); // 等待所有任务完成
// 收集所有结果
for (CompletableFuture<List<RecordDto>> future : futures) {
List<RecordDto> recordDtoList = future.get();
if (recordDtoList != null && !recordDtoList.isEmpty()) {
recordDtos.addAll(recordDtoList);
}
} }
log.info("并行查询完成,总共获取到 {} 条记录", recordDtos.size());
} catch (Exception e) {
log.error("并行查询过程中发生错误", e);
throw new Exception("并行查询失败: " + e.getMessage());
} }
log.info("按照置信度排序完成");
// 假设 RecordDto getScore() 方法
recordDtos.sort((dto1, dto2) -> {
try {
double score1 = Double.parseDouble(dto1.getScore());
double score2 = Double.parseDouble(dto2.getScore());
return Double.compare(score2, score1);
} catch (NumberFormatException e) {
return 0;
}
});
return recordDtos; return recordDtos;
} }
@ -326,4 +372,26 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
public List<TUserDataset> getAllUserDatasets() throws Exception { public List<TUserDataset> getAllUserDatasets() throws Exception {
return tUserDatasetMapper.getAllUserDatasets(); return tUserDatasetMapper.getAllUserDatasets();
} }
/**
* Bean 销毁时关闭线程池释放资源
*/
@PreDestroy
public void destroy() {
if (executorService != null && !executorService.isShutdown()) {
log.info("正在关闭知识库查询线程池...");
executorService.shutdown();
try {
if (!executorService.awaitTermination(60, java.util.concurrent.TimeUnit.SECONDS)) {
executorService.shutdownNow();
log.warn("线程池未能在60秒内正常关闭强制关闭");
}
} catch (InterruptedException e) {
executorService.shutdownNow();
Thread.currentThread().interrupt();
log.error("等待线程池关闭时被中断", e);
}
log.info("知识库查询线程池已关闭");
}
}
} }

View File

@ -18,11 +18,10 @@ import java.util.List;
public class RetrievalUtil { public class RetrievalUtil {
public static List<RecordDto> getRetrieval(String datasetPath, String apiKey, String datasetId, KnowledgeBaseDto knowledgeBaseDto) throws Exception { public static List<RecordDto> getRetrieval(String datasetPath, String apiKey, String datasetId, KnowledgeBaseDto knowledgeBaseDto) throws Exception {
//创建httpclient对象 //创建httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault(); CloseableHttpClient httpClient = HttpClients.createDefault();
// String uri = "http://192.168.1.211/v1/datasets/b9b2322d-0bd9-4a7e-b6a7-14d7d8778bae/retrieve";
String uri = datasetPath +"/"+ datasetId + "/retrieve"; String uri = datasetPath +"/"+ datasetId + "/retrieve";
log.info("uri:" + uri); log.info("uri:" + uri);
//创建请求对象 //创建请求对象
@ -33,47 +32,62 @@ public class RetrievalUtil {
//组装requestbody //组装requestbody
//reranking_model //reranking_model
JSONObject reranking_model = new JSONObject(); JSONObject reranking_model = new JSONObject();
reranking_model.put("reranking_provider_name", ""); reranking_model.put("reranking_provider_name", "langgenius/huggingface_tei/huggingface_tei");
reranking_model.put("reranking_model_name", ""); reranking_model.put("reranking_model_name", "bge-reanker-v2-m3");
//retrieval_model //retrieval_model
JSONObject retrieval_model = new JSONObject(); JSONObject retrieval_model = new JSONObject();
retrieval_model.put("search_method", knowledgeBaseDto.getSearchMethod()); retrieval_model.put("search_method", knowledgeBaseDto.getSearchMethod());
retrieval_model.put("reranking_enable", false); retrieval_model.put("reranking_enable", true);
retrieval_model.put("reranking_mode", null);
retrieval_model.put("reranking_model", reranking_model); retrieval_model.put("reranking_model", reranking_model);
retrieval_model.put("weights", null); retrieval_model.put("weights", null);
retrieval_model.put("top_k", 1); retrieval_model.put("top_k", 10);
retrieval_model.put("score_threshold_enabled", false); retrieval_model.put("score_threshold_enabled", true);
retrieval_model.put("score_threshold", null); retrieval_model.put("score_threshold", 0.5);
//构造请求体 //构造请求体
JSONObject jsonObject = new JSONObject(); JSONObject jsonObject = new JSONObject();
jsonObject.put("query", knowledgeBaseDto.getQuery()); String query = knowledgeBaseDto.getQuery();
log.info("原始查询参数: {}", query);
jsonObject.put("query", query);
jsonObject.put("retrieval_model", retrieval_model); jsonObject.put("retrieval_model", retrieval_model);
StringEntity entity = new StringEntity(jsonObject.toString()); String requestBody = jsonObject.toString();
//指定请求编码方式 log.info("请求体内容: {}", requestBody);
entity.setContentEncoding("utf-8");
StringEntity entity = new StringEntity(requestBody, "UTF-8");
//指定数据格式 //指定数据格式
entity.setContentType("application/json"); entity.setContentType("application/json; charset=UTF-8");
httpPost.setEntity(entity); httpPost.setEntity(entity);
//发送请求 //发送请求
CloseableHttpResponse response = httpClient.execute(httpPost); CloseableHttpResponse response = httpClient.execute(httpPost);
//解析返回结果 //解析返回结果
int code = response.getStatusLine().getStatusCode(); int code = response.getStatusLine().getStatusCode();
System.out.println(code); System.out.println(code);
HttpEntity resultEntity = response.getEntity(); HttpEntity resultEntity = response.getEntity();
String stringEntity = EntityUtils.toString(resultEntity); String stringEntity = EntityUtils.toString(resultEntity);
System.out.println(stringEntity); // log.info("API返回的原始JSON数据: {}", stringEntity);
JSONObject jsonResult = JSON.parseObject(stringEntity); JSONObject jsonResult = JSON.parseObject(stringEntity);
List<RecordDto> recordDtoList = (List<RecordDto>) jsonResult.get("records"); //log.info("解析后的records字段: {}", jsonResult.getString("records"));
//关闭资源
response.close(); // 正确地将 JSON 数组转换为 RecordDto 列表
httpClient.close(); List<RecordDto> recordDtoList = JSON.parseArray(jsonResult.getJSONArray("records").toJSONString(), RecordDto.class);
return recordDtoList;
// 检查解析后的数据
if (recordDtoList != null && !recordDtoList.isEmpty()) {
RecordDto firstRecord = recordDtoList.get(0);
//log.info("第一条记录的segmentDto是否为null: {}", firstRecord.getSegmentDto() == null);
if (firstRecord.getSegmentDto() != null) {
//log.info("第一条记录的segmentDto内容: {}", firstRecord.getSegmentDto());
}
// log.info("第一条记录的完整内容: {}", firstRecord);
} }
//关闭资源
response.close();
httpClient.close();
return recordDtoList;
}
} }

View File

@ -41,6 +41,7 @@ bjtds:
pdfFilePath: ${pdf-file-path:D:\bjtds\pdfFile\} pdfFilePath: ${pdf-file-path:D:\bjtds\pdfFile\}
difyUploadPath: ${dify-upload-path:D:\bjtds\difyUploadFile\} difyUploadPath: ${dify-upload-path:D:\bjtds\difyUploadFile\}
pdfFileTempPath: ${pdf-file-temp-path:D:\bjtds\pdfTempFile\pdfFile\} pdfFileTempPath: ${pdf-file-temp-path:D:\bjtds\pdfTempFile\pdfFile\}
difyDocPreview: ${dify-doc-preview:http://192.168.1.211/dify-doc-preview}
dify: dify:
url: ${dify-url:http://192.168.1.211:16780} # 北京环境 Dify 服务地址 url: ${dify-url:http://192.168.1.211:16780} # 北京环境 Dify 服务地址

View File

@ -41,6 +41,8 @@ bjtds:
pdfFilePath: ${pdf-file-path:D:\bjtds\pdfFile\} pdfFilePath: ${pdf-file-path:D:\bjtds\pdfFile\}
difyUploadPath: ${dify-upload-path:D:\bjtds\difyUploadFile\} difyUploadPath: ${dify-upload-path:D:\bjtds\difyUploadFile\}
pdfFileTempPath: ${pdf-file-temp-path:D:\bjtds\pdfTempFile\pdfFile\} pdfFileTempPath: ${pdf-file-temp-path:D:\bjtds\pdfTempFile\pdfFile\}
difyDocPreview: ${dify-doc-preview:http://192.168.8.253/dify-doc-preview}
dify: dify:
url: ${dify-url:http://192.168.8.253:16780} # 武汉环境 Dify 服务地址 url: ${dify-url:http://192.168.8.253:16780} # 武汉环境 Dify 服务地址