feat:新增智能检索,全库关键字检索及文档预览和下载
This commit is contained in:
parent
5faa1aa59b
commit
93a8fd5dc1
|
@ -148,4 +148,12 @@ export const deleteDataset = (id: string) => {
|
|||
url: '/brichat-service/datasetManage/document/deepAnalysisList',
|
||||
method: 'get'
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export function retrieval(data: { query: string; searchMethod: string }) {
|
||||
return request({
|
||||
url: `/brichat-service/knowledge-base/retrieval`,
|
||||
method: 'post',
|
||||
data
|
||||
})
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ module.exports = {
|
|||
// 配后端数据的接收方式application/json;charset=UTF-8 或 application/x-www-form-urlencoded;charset=UTF-8
|
||||
contentType: 'application/json;charset=UTF-8',
|
||||
// 最长请求时间
|
||||
requestTimeout: 10000,
|
||||
requestTimeout: 20000,
|
||||
// 操作正常code,支持String、Array、int多种类型
|
||||
successCode: [200, 0, '200', '0'],
|
||||
// 数据状态的字段名称
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -29,6 +29,8 @@ public class KnowledgeBaseController {
|
|||
@PostMapping("/retrieval")
|
||||
public ResultUtils retrieval(@RequestBody KnowledgeBaseDto knowledgeBaseDto) throws Exception{
|
||||
List<RecordDto> retrievalResult = knowledgeBaseService.retrieval(knowledgeBaseDto);
|
||||
|
||||
|
||||
return ResultUtils.success(retrievalResult);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
package com.bjtds.brichat.entity.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class DocumentDto {
|
||||
|
||||
private String id;
|
||||
@JsonProperty("data_source_type")
|
||||
@JSONField(name = "data_source_type")
|
||||
private String dataSourceType;
|
||||
private String name;
|
||||
@JsonProperty("doc_type")
|
||||
@JSONField(name = "doc_type")
|
||||
private String docType;
|
||||
@JsonProperty("doc_metadata")
|
||||
@JSONField(name = "doc_metadata")
|
||||
private String docMetadata;
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ import lombok.Data;
|
|||
public class KnowledgeBaseDto {
|
||||
|
||||
private String query;
|
||||
@JsonProperty("search_method")
|
||||
|
||||
private String searchMethod;
|
||||
|
||||
}
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
package com.bjtds.brichat.entity.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class RecordDto {
|
||||
|
||||
/**分段信息*/
|
||||
@JSONField(name = "segment")
|
||||
private SegmentDto segmentDto;
|
||||
@JsonProperty("child_chunks")
|
||||
@JSONField(name = "child_chunks")
|
||||
private String childChunks;
|
||||
/**置信度* 例如0.99*/
|
||||
private String score;
|
||||
@JsonProperty("tsne_position")
|
||||
@JSONField(name = "tsne_position")
|
||||
private String tsnePosition;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package com.bjtds.brichat.entity.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -10,39 +10,41 @@ public class SegmentDto {
|
|||
|
||||
private String id;
|
||||
private Integer position;
|
||||
@JsonProperty("document_id")
|
||||
@JSONField(name = "document_id")
|
||||
private String documentId;
|
||||
private String content;
|
||||
@JsonProperty("sign_content")
|
||||
@JSONField(name = "sign_content")
|
||||
private String signContent;
|
||||
private String answer;
|
||||
@JsonProperty("word_count")
|
||||
@JSONField(name = "word_count")
|
||||
private Integer wordCount;
|
||||
private Integer tokens;
|
||||
private List<String> keywords;
|
||||
@JsonProperty("index_node_id")
|
||||
@JSONField(name = "index_node_id")
|
||||
private String indexNodeId;
|
||||
@JsonProperty("index_node_hash")
|
||||
@JSONField(name = "index_node_hash")
|
||||
private String indexNodeHash;
|
||||
@JsonProperty("hit_count")
|
||||
@JSONField(name = "hit_count")
|
||||
private Integer hitCount;
|
||||
private Boolean enabled;
|
||||
@JsonProperty("disabled_at")
|
||||
@JSONField(name = "disabled_at")
|
||||
private String disabledAt;
|
||||
@JsonProperty("disabled_by")
|
||||
@JSONField(name = "disabled_by")
|
||||
private String disabledBy;
|
||||
private String status;
|
||||
@JsonProperty("created_at")
|
||||
@JSONField(name = "created_at")
|
||||
private String createdAt;
|
||||
@JsonProperty("created_by")
|
||||
@JSONField(name = "created_by")
|
||||
private String createdBy;
|
||||
@JsonProperty("indexing_at")
|
||||
@JSONField(name = "indexing_at")
|
||||
private String indexingAt;
|
||||
@JsonProperty("completed_at")
|
||||
@JSONField(name = "completed_at")
|
||||
private String completedAt;
|
||||
private String error;
|
||||
@JsonProperty("stopped_at")
|
||||
@JSONField(name = "stopped_at")
|
||||
private String stoppedAt;
|
||||
|
||||
@JSONField(name = "document")
|
||||
private DocumentDto documentDto;
|
||||
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@ import com.bjtds.brichat.entity.dify.*;
|
|||
import com.bjtds.brichat.entity.dto.PdfConversionResponse;
|
||||
import com.bjtds.brichat.entity.dto.PdfTaskDto;
|
||||
import com.bjtds.brichat.mapper.postgresql.DifyDatasetsMapper;
|
||||
import com.bjtds.brichat.mapper.postgresql.DifyUploadFileMapper;
|
||||
import com.bjtds.brichat.service.DatasetsDocService;
|
||||
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
||||
import com.bjtds.brichat.util.Constants;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
|
@ -86,9 +88,18 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
|||
@Value("${pdf.conversion.service.max-workers}")
|
||||
private Integer pdfConversionMaxWorkers;
|
||||
|
||||
@Value("${bjtds.difyDocPreview}")
|
||||
private String difyDocPreview;
|
||||
|
||||
@Resource
|
||||
private DifyDatasetsMapper difyDatasetsMapper;
|
||||
|
||||
@Resource
|
||||
private DifyUploadFileMapper difyUploadFileMapper;
|
||||
|
||||
@Resource
|
||||
private DatasetsDocService datasetsDocService;
|
||||
|
||||
|
||||
@Override
|
||||
public ResponseEntity<DifyMetadata> addMetadata(String datasetId, DifyMetadata metadata) {
|
||||
|
@ -423,10 +434,15 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
|||
Map<String,String> document = ( Map<String,String> ) exchange.getBody().get("document");
|
||||
String documentId = document.get("id");
|
||||
|
||||
UploadFileInfoResponse uploadFileInfoResponse = difyDatasetService.uploadFileInfo(request.getDatasetId(), documentId);
|
||||
String urls = uploadFileInfoResponse.getUrl();
|
||||
String fullUrl = difyUrl + urls;
|
||||
setSourceUrlMatedata(request.getDatasetId(),documentId,fullUrl);
|
||||
String uploadId = datasetsDocService.getUploadById(documentId);
|
||||
DifyUploadFile uploadInfo = difyUploadFileMapper.getFileById(uploadId);
|
||||
|
||||
|
||||
int lastIndex = uploadInfo.getKey().lastIndexOf('/');
|
||||
String fileName = uploadInfo.getKey().substring(lastIndex + 1);
|
||||
String sourceUrl = difyDocPreview +"/" +fileName;
|
||||
|
||||
setSourceUrlMatedata(request.getDatasetId(),documentId,sourceUrl);
|
||||
|
||||
return exchange;
|
||||
} catch (Exception e) {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
// src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java
|
||||
|
||||
package com.bjtds.brichat.service.impl;
|
||||
|
||||
import com.bjtds.brichat.entity.dataset.AppDataset;
|
||||
|
||||
import com.bjtds.brichat.entity.dataset.TUserDataset;
|
||||
import com.bjtds.brichat.entity.dataset.WorkflowDatasetDto;
|
||||
import com.bjtds.brichat.entity.dify.Workflow;
|
||||
|
@ -22,10 +22,12 @@ import org.springframework.beans.factory.annotation.Autowired;
|
|||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.stream.Collectors;
|
||||
import javax.annotation.PreDestroy;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
|
@ -53,23 +55,67 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
|
|||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
|
||||
// 创建线程池用于并行查询
|
||||
private final ExecutorService executorService = Executors.newFixedThreadPool(10);
|
||||
|
||||
@Override
|
||||
public List<RecordDto> retrieval(KnowledgeBaseDto knowledgeBaseDto) throws Exception {
|
||||
String datasetPath = difyUrl + Constants.DATABASE_API;
|
||||
|
||||
List<String> datasetIds = difyDatasetsMapper.getDatasetIds();
|
||||
List<RecordDto> recordDtos = Lists.newArrayList();
|
||||
log.info("datasetPath:{}", datasetPath);
|
||||
log.info("apiKey:{}", apiKey);
|
||||
log.info("开始并行查询 {} 个数据集", datasetIds.size());
|
||||
|
||||
//同步查询,我需要异步
|
||||
for (String datasetId : datasetIds) {
|
||||
List<RecordDto> recordDtoList = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto);
|
||||
if (recordDtoList != null && !recordDtoList.isEmpty()) {
|
||||
// 添加判空条件
|
||||
recordDtos.addAll(recordDtoList);
|
||||
// 使用 CompletableFuture 并行查询多个数据集
|
||||
List<CompletableFuture<List<RecordDto>>> futures = datasetIds.stream()
|
||||
.map(datasetId -> CompletableFuture.<List<RecordDto>>supplyAsync(() -> {
|
||||
try {
|
||||
log.debug("开始查询数据集: {}", datasetId);
|
||||
List<RecordDto> result = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto);
|
||||
log.debug("完成查询数据集: {}, 结果数量: {}", datasetId, result != null ? result.size() : 0);
|
||||
return result;
|
||||
} catch (Exception e) {
|
||||
log.error("查询数据集 {} 时发生错误", datasetId, e);
|
||||
return new ArrayList<RecordDto>(); // 返回空列表而不是抛出异常
|
||||
}
|
||||
}, executorService))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
|
||||
// 等待所有查询完成并合并结果
|
||||
List<RecordDto> recordDtos = Lists.newArrayList();
|
||||
try {
|
||||
CompletableFuture<Void> allFutures = CompletableFuture.allOf(
|
||||
futures.toArray(new CompletableFuture[0]));
|
||||
|
||||
allFutures.get(); // 等待所有任务完成
|
||||
|
||||
// 收集所有结果
|
||||
for (CompletableFuture<List<RecordDto>> future : futures) {
|
||||
List<RecordDto> recordDtoList = future.get();
|
||||
if (recordDtoList != null && !recordDtoList.isEmpty()) {
|
||||
recordDtos.addAll(recordDtoList);
|
||||
}
|
||||
}
|
||||
|
||||
log.info("并行查询完成,总共获取到 {} 条记录", recordDtos.size());
|
||||
} catch (Exception e) {
|
||||
log.error("并行查询过程中发生错误", e);
|
||||
throw new Exception("并行查询失败: " + e.getMessage());
|
||||
}
|
||||
|
||||
log.info("按照置信度排序完成");
|
||||
// 假设 RecordDto 有 getScore() 方法
|
||||
recordDtos.sort((dto1, dto2) -> {
|
||||
try {
|
||||
double score1 = Double.parseDouble(dto1.getScore());
|
||||
double score2 = Double.parseDouble(dto2.getScore());
|
||||
return Double.compare(score2, score1);
|
||||
} catch (NumberFormatException e) {
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
return recordDtos;
|
||||
}
|
||||
|
||||
|
@ -326,4 +372,26 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
|
|||
public List<TUserDataset> getAllUserDatasets() throws Exception {
|
||||
return tUserDatasetMapper.getAllUserDatasets();
|
||||
}
|
||||
|
||||
/**
|
||||
* 在 Bean 销毁时关闭线程池,释放资源
|
||||
*/
|
||||
@PreDestroy
|
||||
public void destroy() {
|
||||
if (executorService != null && !executorService.isShutdown()) {
|
||||
log.info("正在关闭知识库查询线程池...");
|
||||
executorService.shutdown();
|
||||
try {
|
||||
if (!executorService.awaitTermination(60, java.util.concurrent.TimeUnit.SECONDS)) {
|
||||
executorService.shutdownNow();
|
||||
log.warn("线程池未能在60秒内正常关闭,强制关闭");
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
executorService.shutdownNow();
|
||||
Thread.currentThread().interrupt();
|
||||
log.error("等待线程池关闭时被中断", e);
|
||||
}
|
||||
log.info("知识库查询线程池已关闭");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -18,11 +18,10 @@ import java.util.List;
|
|||
public class RetrievalUtil {
|
||||
|
||||
|
||||
public static List<RecordDto> getRetrieval(String datasetPath, String apiKey, String datasetId, KnowledgeBaseDto knowledgeBaseDto) throws Exception {
|
||||
public static List<RecordDto> getRetrieval(String datasetPath, String apiKey, String datasetId, KnowledgeBaseDto knowledgeBaseDto) throws Exception {
|
||||
//创建httpclient对象
|
||||
CloseableHttpClient httpClient = HttpClients.createDefault();
|
||||
|
||||
// String uri = "http://192.168.1.211/v1/datasets/b9b2322d-0bd9-4a7e-b6a7-14d7d8778bae/retrieve";
|
||||
String uri = datasetPath +"/"+ datasetId + "/retrieve";
|
||||
log.info("uri:" + uri);
|
||||
//创建请求对象
|
||||
|
@ -33,47 +32,62 @@ public class RetrievalUtil {
|
|||
//组装requestbody
|
||||
//reranking_model
|
||||
JSONObject reranking_model = new JSONObject();
|
||||
reranking_model.put("reranking_provider_name", "");
|
||||
reranking_model.put("reranking_model_name", "");
|
||||
reranking_model.put("reranking_provider_name", "langgenius/huggingface_tei/huggingface_tei");
|
||||
reranking_model.put("reranking_model_name", "bge-reanker-v2-m3");
|
||||
//retrieval_model
|
||||
JSONObject retrieval_model = new JSONObject();
|
||||
retrieval_model.put("search_method", knowledgeBaseDto.getSearchMethod());
|
||||
retrieval_model.put("reranking_enable", false);
|
||||
retrieval_model.put("reranking_mode", null);
|
||||
retrieval_model.put("reranking_enable", true);
|
||||
retrieval_model.put("reranking_model", reranking_model);
|
||||
retrieval_model.put("weights", null);
|
||||
retrieval_model.put("top_k", 1);
|
||||
retrieval_model.put("score_threshold_enabled", false);
|
||||
retrieval_model.put("score_threshold", null);
|
||||
retrieval_model.put("top_k", 10);
|
||||
retrieval_model.put("score_threshold_enabled", true);
|
||||
retrieval_model.put("score_threshold", 0.5);
|
||||
|
||||
//构造请求体
|
||||
JSONObject jsonObject = new JSONObject();
|
||||
jsonObject.put("query", knowledgeBaseDto.getQuery());
|
||||
String query = knowledgeBaseDto.getQuery();
|
||||
log.info("原始查询参数: {}", query);
|
||||
jsonObject.put("query", query);
|
||||
jsonObject.put("retrieval_model", retrieval_model);
|
||||
|
||||
StringEntity entity = new StringEntity(jsonObject.toString());
|
||||
//指定请求编码方式
|
||||
entity.setContentEncoding("utf-8");
|
||||
String requestBody = jsonObject.toString();
|
||||
log.info("请求体内容: {}", requestBody);
|
||||
|
||||
StringEntity entity = new StringEntity(requestBody, "UTF-8");
|
||||
//指定数据格式
|
||||
entity.setContentType("application/json");
|
||||
entity.setContentType("application/json; charset=UTF-8");
|
||||
httpPost.setEntity(entity);
|
||||
|
||||
//发送请求
|
||||
CloseableHttpResponse response = httpClient.execute(httpPost);
|
||||
//发送请求
|
||||
CloseableHttpResponse response = httpClient.execute(httpPost);
|
||||
|
||||
//解析返回结果
|
||||
int code = response.getStatusLine().getStatusCode();
|
||||
System.out.println(code);
|
||||
//解析返回结果
|
||||
int code = response.getStatusLine().getStatusCode();
|
||||
System.out.println(code);
|
||||
|
||||
HttpEntity resultEntity = response.getEntity();
|
||||
String stringEntity = EntityUtils.toString(resultEntity);
|
||||
System.out.println(stringEntity);
|
||||
HttpEntity resultEntity = response.getEntity();
|
||||
String stringEntity = EntityUtils.toString(resultEntity);
|
||||
// log.info("API返回的原始JSON数据: {}", stringEntity);
|
||||
|
||||
JSONObject jsonResult = JSON.parseObject(stringEntity);
|
||||
List<RecordDto> recordDtoList = (List<RecordDto>) jsonResult.get("records");
|
||||
//关闭资源
|
||||
response.close();
|
||||
httpClient.close();
|
||||
return recordDtoList;
|
||||
JSONObject jsonResult = JSON.parseObject(stringEntity);
|
||||
//log.info("解析后的records字段: {}", jsonResult.getString("records"));
|
||||
|
||||
// 正确地将 JSON 数组转换为 RecordDto 列表
|
||||
List<RecordDto> recordDtoList = JSON.parseArray(jsonResult.getJSONArray("records").toJSONString(), RecordDto.class);
|
||||
|
||||
// 检查解析后的数据
|
||||
if (recordDtoList != null && !recordDtoList.isEmpty()) {
|
||||
RecordDto firstRecord = recordDtoList.get(0);
|
||||
//log.info("第一条记录的segmentDto是否为null: {}", firstRecord.getSegmentDto() == null);
|
||||
if (firstRecord.getSegmentDto() != null) {
|
||||
//log.info("第一条记录的segmentDto内容: {}", firstRecord.getSegmentDto());
|
||||
}
|
||||
// log.info("第一条记录的完整内容: {}", firstRecord);
|
||||
}
|
||||
//关闭资源
|
||||
response.close();
|
||||
httpClient.close();
|
||||
return recordDtoList;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ bjtds:
|
|||
pdfFilePath: ${pdf-file-path:D:\bjtds\pdfFile\}
|
||||
difyUploadPath: ${dify-upload-path:D:\bjtds\difyUploadFile\}
|
||||
pdfFileTempPath: ${pdf-file-temp-path:D:\bjtds\pdfTempFile\pdfFile\}
|
||||
difyDocPreview: ${dify-doc-preview:http://192.168.1.211/dify-doc-preview}
|
||||
|
||||
dify:
|
||||
url: ${dify-url:http://192.168.1.211:16780} # 北京环境 Dify 服务地址
|
||||
|
|
|
@ -41,6 +41,8 @@ bjtds:
|
|||
pdfFilePath: ${pdf-file-path:D:\bjtds\pdfFile\}
|
||||
difyUploadPath: ${dify-upload-path:D:\bjtds\difyUploadFile\}
|
||||
pdfFileTempPath: ${pdf-file-temp-path:D:\bjtds\pdfTempFile\pdfFile\}
|
||||
difyDocPreview: ${dify-doc-preview:http://192.168.8.253/dify-doc-preview}
|
||||
|
||||
|
||||
dify:
|
||||
url: ${dify-url:http://192.168.8.253:16780} # 武汉环境 Dify 服务地址
|
||||
|
|
Loading…
Reference in New Issue