Merge remote-tracking branch 'origin/main'
# Conflicts: # chat-client/src/views/chatweb/components/ChatBox.vue # chat-client/src/views/datasets/components/DocumentList.vue
This commit is contained in:
commit
4cec0201ff
|
@ -2,17 +2,17 @@
|
|||
# 此文件修改后需要重启项目
|
||||
NODE_ENV=development
|
||||
#VUE_APP_BASE_URL='/vab-mock-server'
|
||||
VUE_APP_BASE_URL='http://localhost:10001'
|
||||
VUE_APP_API_BASE_URL='http://localhost:8080'
|
||||
# VUE_APP_BASE_URL='http://localhost:10001'
|
||||
# VUE_APP_API_BASE_URL='http://localhost:8080'
|
||||
#北京服务器配置
|
||||
# VUE_APP_BASE_URL='http://192.168.1.211:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.1.211:80/brichat'
|
||||
# #武汉公司服务器
|
||||
# VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
#总部服务器配置
|
||||
# VUE_APP_BASE_URL='http://192.168.0.33:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.0.33:80/brichat'
|
||||
|
||||
VUE_APP_GITHUB_USER_NAME=test
|
||||
VUE_APP_SECRET_KEY=preview
|
||||
# VUE_APP_GITHUB_USER_NAME=test
|
||||
# VUE_APP_SECRET_KEY=preview
|
||||
|
|
|
@ -2,18 +2,18 @@
|
|||
# 此文件修改后需要重启项目
|
||||
NODE_ENV=production
|
||||
# VUE_APP_BASE_URL='/vab-mock-server'
|
||||
VUE_APP_BASE_URL='http://localhost:10001'
|
||||
VUE_APP_API_BASE_URL='http://localhost:10001'
|
||||
# VUE_APP_BASE_URL='http://localhost:10001'
|
||||
# VUE_APP_API_BASE_URL='http://localhost:10001'
|
||||
#北京服务器
|
||||
# VUE_APP_BASE_URL='http://192.168.1.211:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.1.211:80/brichat'
|
||||
#武汉公司服务器
|
||||
# VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
# 总部服务器
|
||||
# VUE_APP_BASE_URL='http://192.168.0.33:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.0.33:80/brichat'
|
||||
|
||||
|
||||
VUE_APP_GITHUB_USER_NAME=test
|
||||
VUE_APP_SECRET_KEY=preview
|
||||
# VUE_APP_GITHUB_USER_NAME=test
|
||||
# VUE_APP_SECRET_KEY=preview
|
|
@ -138,4 +138,14 @@ export const deleteDataset = (id: string) => {
|
|||
datasetId
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取深度解析任务列表
|
||||
*/
|
||||
export const getDeepAnalysisList = () => {
|
||||
return request({
|
||||
url: '/brichat-service/datasetManage/document/deepAnalysisList',
|
||||
method: 'get'
|
||||
})
|
||||
}
|
|
@ -295,6 +295,8 @@ export default {
|
|||
download: 'Download',
|
||||
rename: 'Rename',
|
||||
delete: 'Delete',
|
||||
deepAnalysis: 'Deep Analysis',
|
||||
taskMonitor: 'Task Monitor',
|
||||
},
|
||||
search: {
|
||||
placeholder: 'Enter keywords to search',
|
||||
|
@ -314,6 +316,8 @@ export default {
|
|||
segmentation: 'Segmentation Rules',
|
||||
separatorPlaceholder: 'Separator (default: ###)',
|
||||
maxTokens: 'Max Tokens',
|
||||
deepAnalysis: 'Deep Analysis',
|
||||
deepAnalysisOption: 'Enable deep analysis to improve document understanding accuracy',
|
||||
selectFile: 'Select the file',
|
||||
fileTip:
|
||||
'Multiple selection supported, max 100MB per file. Accepted formats: TXT, MD, MARKDOWN, MDX, PDF, HTML, HTM, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES',
|
||||
|
@ -331,6 +335,46 @@ export default {
|
|||
confirm: 'Confirm',
|
||||
deleteconfirm: 'Delete Confirm',
|
||||
},
|
||||
deepAnalysisDialog: {
|
||||
title: 'Deep Analysis Task List',
|
||||
noTasks: 'No deep analysis tasks are currently in progress',
|
||||
datasetName: 'Dataset',
|
||||
taskId: 'Task ID',
|
||||
createTime: 'Create Time',
|
||||
close: 'Close',
|
||||
totalTasks: 'Total Tasks',
|
||||
runningTasks: 'Running',
|
||||
completedTasks: 'Completed',
|
||||
completed: 'Completed',
|
||||
running: 'Running',
|
||||
pending: 'Pending',
|
||||
},
|
||||
progressPanel: {
|
||||
title: 'Deep Analysis Progress',
|
||||
show: 'Show Details',
|
||||
hide: 'Hide Details',
|
||||
},
|
||||
tooltips: {
|
||||
hasRunningTasks: '{count} tasks in progress',
|
||||
viewDeepAnalysis: 'View deep analysis tasks',
|
||||
showSidebar: 'Show task monitor panel',
|
||||
hideSidebar: 'Hide task monitor panel',
|
||||
},
|
||||
sidebar: {
|
||||
title: 'Task Monitor',
|
||||
},
|
||||
timeAgo: {
|
||||
justNow: 'Just now',
|
||||
minutes: '{count} minutes ago',
|
||||
hours: '{count} hours ago',
|
||||
days: '{count} days ago',
|
||||
},
|
||||
notifications: {
|
||||
allTasksCompleted: 'Tasks Completed',
|
||||
allTasksCompletedMessage: 'All deep analysis tasks have been completed!',
|
||||
taskCompleted: 'Task Completed',
|
||||
taskCompletedMessage: 'Deep analysis task "{name}" has been completed',
|
||||
},
|
||||
messages: {
|
||||
FilenamecantEmpty: 'Filename cannot be empty',
|
||||
uploadSuccess: 'Upload successfully',
|
||||
|
@ -359,6 +403,7 @@ export default {
|
|||
fetchFailed: 'Failed to get document list: ',
|
||||
previewFailed: 'Preview failed: ',
|
||||
downloadFailed: 'Download failed: ',
|
||||
fetchDeepAnalysisFailed: 'Failed to get deep analysis task list',
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -515,63 +560,35 @@ export default {
|
|||
Tip: 'Tip',
|
||||
selectDeleteData: 'Please select the data to be deleted',
|
||||
confirmDeleteApiKeys: 'This operation will permanently delete selected ',
|
||||
confirmDeleteApiKeysEnd: ' API key, are you sure?',
|
||||
confirmDeleteApiKeysEndPlural: ' API keys, are you sure?',
|
||||
confirmDeleteApiKeysEnd:' API key, are you sure?',
|
||||
confirmDeleteApiKeysEndPlural:' API keys, are you sure?',
|
||||
confirmRefreshCache: 'This operation will refresh Redis cache, reloading all API keys, are you sure?',
|
||||
updateSuccess: 'Update Successfully',
|
||||
updateFail: 'Update Failed',
|
||||
addSuccess: 'Add Successfully',
|
||||
addFail: 'Add Failed',
|
||||
addFail:'Add Failed'
|
||||
},
|
||||
prologue: {
|
||||
title: 'Opening Statement Management',
|
||||
query: 'Query',
|
||||
save: 'Save',
|
||||
prologue:{
|
||||
chatType: 'Chat Type',
|
||||
chatTypes: {
|
||||
'1': 'Fault Diagnosis',
|
||||
'2': 'Intelligent Q&A',
|
||||
'3': 'Chart Report',
|
||||
'4': 'Emergency Assistant Q&A',
|
||||
'5': 'Diagnostic Code Lookup',
|
||||
},
|
||||
openingContent: 'Opening Statement Content',
|
||||
addRecommend: 'Add Recommended Question',
|
||||
recommendation: 'Recommended Question',
|
||||
action: 'Action',
|
||||
selectChatType: 'Select Chat Type',
|
||||
prologueContent: 'Prologue Content',
|
||||
addRecommend: 'Add Recommendation',
|
||||
recommendQuestion: 'Recommended Question',
|
||||
actions: 'Actions',
|
||||
edit: 'Edit',
|
||||
delete: 'Delete',
|
||||
dialog: {
|
||||
addTitle: 'Add Recommended Question',
|
||||
editTitle: 'Edit Recommended Question',
|
||||
questionContent: 'Recommended Question',
|
||||
cancel: 'Cancel',
|
||||
confirm: 'Confirm',
|
||||
save: 'Save',
|
||||
},
|
||||
message: {
|
||||
loadSuccess: 'Loaded successfully',
|
||||
loadFailed: 'Load failed, please try again later',
|
||||
loadFailedMess: 'Failed to load recommended questions',
|
||||
notFound: 'Opening statement not found',
|
||||
saveSuccess: 'Saved successfully',
|
||||
saveFailed: 'Save failed, please try again later',
|
||||
addSuccess: 'Recommended question added successfully',
|
||||
addFailed: 'Failed to add recommended question',
|
||||
editSuccess: 'Recommended question edited successfully',
|
||||
editFailed: 'Failed to edit recommended question, please try again later',
|
||||
deleteSuccess: 'Deleted successfully',
|
||||
deleteFailed: 'Deletion failed',
|
||||
},
|
||||
confirm: {
|
||||
save: 'Are you sure you want to save the changes?',
|
||||
tips: 'Tips',
|
||||
delete: 'Are you sure you want to delete this recommended question?',
|
||||
deleteConfirm: 'Delete Confirmation',
|
||||
},
|
||||
placeholder: {
|
||||
selectType: 'Select chat type',
|
||||
},
|
||||
confirmDelete: 'Are you sure you want to delete this recommendation?',
|
||||
confirmSave: 'Are you sure you want to save the changes?',
|
||||
deleteSuccess: 'Deleted successfully',
|
||||
deleteFailure: 'Failed to delete, please try again later',
|
||||
saveSuccess: 'Saved successfully',
|
||||
saveFailure: 'Failed to save, please try again later',
|
||||
loadSuccess: 'Loaded successfully',
|
||||
loadFailure: 'Failed to load, please try again later',
|
||||
noContent: 'No content found',
|
||||
loading: 'Loading...',
|
||||
updateRecommendation: 'Update Recommendation',
|
||||
addRecommendation: 'Add Recommendation',
|
||||
},
|
||||
vabtabs: {
|
||||
refresh: 'Refresh',
|
||||
|
|
|
@ -293,6 +293,8 @@ export default {
|
|||
download: '下載',
|
||||
rename: '重命名',
|
||||
delete: '刪除',
|
||||
deepAnalysis: '深度解析',
|
||||
taskMonitor: '任務監控',
|
||||
},
|
||||
search: {
|
||||
placeholder: '請輸入關鍵詞查詢',
|
||||
|
@ -328,6 +330,46 @@ export default {
|
|||
confirm: '確定',
|
||||
deleteconfirm: '刪除確認',
|
||||
},
|
||||
deepAnalysisDialog: {
|
||||
title: '深度解析任務列表',
|
||||
noTasks: '當前沒有正在進行的深度解析任務',
|
||||
datasetName: '知識庫',
|
||||
taskId: '任務ID',
|
||||
createTime: '創建時間',
|
||||
close: '關閉',
|
||||
totalTasks: '總任務數',
|
||||
runningTasks: '進行中',
|
||||
completedTasks: '已完成',
|
||||
completed: '已完成',
|
||||
running: '進行中',
|
||||
pending: '等待中',
|
||||
},
|
||||
progressPanel: {
|
||||
title: '深度解析進度',
|
||||
show: '展開詳情',
|
||||
hide: '收起詳情',
|
||||
},
|
||||
tooltips: {
|
||||
hasRunningTasks: '有 {count} 個任務正在進行',
|
||||
viewDeepAnalysis: '查看深度解析任務',
|
||||
showSidebar: '顯示任務監控面板',
|
||||
hideSidebar: '隱藏任務監控面板',
|
||||
},
|
||||
sidebar: {
|
||||
title: '任務監控',
|
||||
},
|
||||
timeAgo: {
|
||||
justNow: '剛剛',
|
||||
minutes: '{count} 分鐘前',
|
||||
hours: '{count} 小時前',
|
||||
days: '{count} 天前',
|
||||
},
|
||||
notifications: {
|
||||
allTasksCompleted: '任務完成',
|
||||
allTasksCompletedMessage: '所有深度解析任務已完成!',
|
||||
taskCompleted: '任務完成',
|
||||
taskCompletedMessage: '深度解析任務 "{name}" 已完成',
|
||||
},
|
||||
messages: {
|
||||
FilenamecantEmpty: '文件名不能为空',
|
||||
uploadSuccess: '上傳成功',
|
||||
|
|
|
@ -303,6 +303,8 @@ export default {
|
|||
download: '下载',
|
||||
rename: '重命名',
|
||||
delete: '删除',
|
||||
deepAnalysis: '深度解析',
|
||||
taskMonitor: '任务监控',
|
||||
},
|
||||
search: {
|
||||
placeholder: '请输入关键词查询',
|
||||
|
@ -322,6 +324,8 @@ export default {
|
|||
segmentation: '分段规则',
|
||||
separatorPlaceholder: '分隔符(默认###)',
|
||||
maxTokens: '最大Token数',
|
||||
deepAnalysis: '深度解析',
|
||||
deepAnalysisOption: '启用深度解析,提高文档理解精度',
|
||||
selectFile: '选择文件',
|
||||
fileTip: '支持多选,单个文件不超过100MB,可接受格式:TXT、MD、MARKDOWN、MDX、PDF、HTML、HTM、XLSX、XLS、DOCX、CSV、VTT、PROPERTIES',
|
||||
cancel: '取消',
|
||||
|
@ -338,6 +342,46 @@ export default {
|
|||
confirm: '确定',
|
||||
deleteconfirm: '删除确认',
|
||||
},
|
||||
deepAnalysisDialog: {
|
||||
title: '深度解析任务列表',
|
||||
noTasks: '当前没有正在进行的深度解析任务',
|
||||
datasetName: '知识库',
|
||||
taskId: '任务ID',
|
||||
createTime: '创建时间',
|
||||
close: '关闭',
|
||||
totalTasks: '总任务数',
|
||||
runningTasks: '进行中',
|
||||
completedTasks: '已完成',
|
||||
completed: '已完成',
|
||||
running: '进行中',
|
||||
pending: '等待中',
|
||||
},
|
||||
progressPanel: {
|
||||
title: '深度解析进度',
|
||||
show: '展开详情',
|
||||
hide: '收起详情',
|
||||
},
|
||||
tooltips: {
|
||||
hasRunningTasks: '有 {count} 个任务正在进行',
|
||||
viewDeepAnalysis: '查看深度解析任务',
|
||||
showSidebar: '显示任务监控面板',
|
||||
hideSidebar: '隐藏任务监控面板',
|
||||
},
|
||||
sidebar: {
|
||||
title: '任务监控',
|
||||
},
|
||||
timeAgo: {
|
||||
justNow: '刚刚',
|
||||
minutes: '{count} 分钟前',
|
||||
hours: '{count} 小时前',
|
||||
days: '{count} 天前',
|
||||
},
|
||||
notifications: {
|
||||
allTasksCompleted: '任务完成',
|
||||
allTasksCompletedMessage: '所有深度解析任务已完成!',
|
||||
taskCompleted: '任务完成',
|
||||
taskCompletedMessage: '深度解析任务 "{name}" 已完成',
|
||||
},
|
||||
messages: {
|
||||
FilenamecantEmpty: '文件名不能为空',
|
||||
uploadSuccess: '上传成功',
|
||||
|
@ -367,6 +411,7 @@ export default {
|
|||
fetchFailed: '获取文档列表失败: ',
|
||||
previewFailed: '预览失败: ',
|
||||
downloadFailed: '下载失败: ',
|
||||
fetchDeepAnalysisFailed: '获取深度解析任务列表失败',
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -3,9 +3,11 @@ package com.bjtds;
|
|||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableFeignClients
|
||||
@EnableScheduling
|
||||
public class BriChatServiceApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
package com.bjtds.brichat.config;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.scheduling.TaskScheduler;
|
||||
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;
|
||||
|
||||
/**
|
||||
* 定时任务配置类
|
||||
*/
|
||||
@Configuration
|
||||
public class ScheduleConfig {
|
||||
|
||||
/**
|
||||
* 配置定时任务线程池
|
||||
*/
|
||||
@Bean
|
||||
public TaskScheduler taskScheduler() {
|
||||
ThreadPoolTaskScheduler scheduler = new ThreadPoolTaskScheduler();
|
||||
scheduler.setPoolSize(5); // 设置线程池大小
|
||||
scheduler.setThreadNamePrefix("pdf-task-scheduler-");
|
||||
scheduler.setAwaitTerminationSeconds(60);
|
||||
scheduler.setWaitForTasksToCompleteOnShutdown(true);
|
||||
scheduler.initialize();
|
||||
return scheduler;
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@ package com.bjtds.brichat.controller;
|
|||
import cn.hutool.core.io.resource.InputStreamResource;
|
||||
import com.bjtds.brichat.entity.dataset.DatasetsDocRenameReq;
|
||||
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
||||
import com.bjtds.brichat.entity.dto.PdfTaskDto;
|
||||
import com.bjtds.brichat.service.DatasetsDocService;
|
||||
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
||||
import com.bjtds.brichat.util.Constants;
|
||||
|
@ -16,8 +17,10 @@ import io.github.guoshiqiufeng.dify.dataset.dto.response.DocumentInfo;
|
|||
import io.github.guoshiqiufeng.dify.dataset.dto.response.UploadFileInfoResponse;
|
||||
import io.swagger.annotations.Api;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.io.ByteArrayResource;
|
||||
import org.springframework.data.redis.core.RedisTemplate;
|
||||
import org.springframework.http.*;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
@ -25,6 +28,9 @@ import org.springframework.web.multipart.MultipartFile;
|
|||
import javax.annotation.Resource;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpStatus;
|
||||
|
@ -59,7 +65,8 @@ public class DatasetDocController {
|
|||
|
||||
@Value("${dify.url}")
|
||||
private String difyUrl;
|
||||
|
||||
@Autowired
|
||||
private RedisTemplate<String, Object> redisTemplate;
|
||||
|
||||
|
||||
@PostMapping("/page")
|
||||
|
@ -168,6 +175,63 @@ public class DatasetDocController {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* 获取深度解析任务列表(正在pdf-> md的列表)
|
||||
* 返回:PdfTaskDto列表
|
||||
*
|
||||
* @return ResultUtils 包含PdfTaskDto列表的响应
|
||||
*/
|
||||
@GetMapping("/deepAnalysisList")
|
||||
public ResultUtils getDeepAnalysisList(){
|
||||
List<PdfTaskDto> taskList = new ArrayList<>();
|
||||
|
||||
try {
|
||||
// 1. 获取所有任务ID列表
|
||||
List<Object> taskIds = redisTemplate.opsForList().range(Constants.PDF_TASK_REDIS_KEY + ":list", 0, -1);
|
||||
|
||||
if (taskIds == null || taskIds.isEmpty()) {
|
||||
log.info("当前没有正在处理的PDF转换任务");
|
||||
return ResultUtils.success(taskList);
|
||||
}
|
||||
|
||||
log.info("获取到{}个PDF转换任务", taskIds.size());
|
||||
|
||||
// 2. 遍历任务ID,获取具体任务信息
|
||||
for (Object taskIdObj : taskIds) {
|
||||
if (taskIdObj == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String taskId = taskIdObj.toString();
|
||||
try {
|
||||
// 3. 根据任务ID从Redis获取任务详细信息
|
||||
String hashKey = Constants.PDF_TASK_REDIS_KEY + ":" + taskId;
|
||||
Object taskInfoObj = redisTemplate.opsForHash().get(hashKey, "taskInfo");
|
||||
|
||||
if (taskInfoObj instanceof PdfTaskDto) {
|
||||
PdfTaskDto taskInfo = (PdfTaskDto) taskInfoObj;
|
||||
taskList.add(taskInfo);
|
||||
log.debug("获取任务信息成功: taskId={}, name={}, percent={}",
|
||||
taskId, taskInfo.getName(), taskInfo.getPercent());
|
||||
} else {
|
||||
log.warn("任务{}的信息格式不正确或不存在", taskId);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("获取任务{}的详细信息失败: {}", taskId, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
log.info("成功获取{}个有效的PDF转换任务信息", taskList.size());
|
||||
return ResultUtils.success(taskList);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("获取深度解析任务列表失败: {}", e.getMessage(), e);
|
||||
return ResultUtils.error("获取深度解析任务列表失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@ public class DocumentUploadReq implements Serializable {
|
|||
|
||||
private String datasetId;
|
||||
|
||||
private Boolean deepAnalysis;
|
||||
|
||||
@JsonProperty("original_document_id")
|
||||
@JsonAlias({"originalDocumentId"})
|
||||
private String originalDocumentId;
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
package com.bjtds.brichat.entity.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
/**
|
||||
* PDF转换API响应实体类
|
||||
*/
|
||||
public class PdfConversionResponse {
|
||||
|
||||
@JsonProperty("task_id")
|
||||
private String taskId;
|
||||
|
||||
public PdfConversionResponse() {
|
||||
}
|
||||
|
||||
public PdfConversionResponse(String taskId) {
|
||||
this.taskId = taskId;
|
||||
}
|
||||
|
||||
public String getTaskId() {
|
||||
return taskId;
|
||||
}
|
||||
|
||||
public void setTaskId(String taskId) {
|
||||
this.taskId = taskId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PdfConversionResponse{" +
|
||||
"taskId='" + taskId + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package com.bjtds.brichat.entity.dto;
|
||||
|
||||
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* PDF转换任务信息DTO
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class PdfTaskDto {
|
||||
|
||||
/**
|
||||
* 文件名称
|
||||
*/
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* 任务ID
|
||||
*/
|
||||
@JsonProperty("task_id")
|
||||
private String taskId;
|
||||
|
||||
/**
|
||||
* 解析百分比
|
||||
*/
|
||||
private Double percent;
|
||||
|
||||
/**
|
||||
* 数据集ID
|
||||
*/
|
||||
private String datasetId;
|
||||
|
||||
private String datasetName;
|
||||
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
private Long createTime;
|
||||
|
||||
/**
|
||||
* 上传请求参数
|
||||
*/
|
||||
private DocumentUploadReq uploadReq;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PdfTaskDto{" +
|
||||
"name='" + name + '\'' +
|
||||
", taskId='" + taskId + '\'' +
|
||||
", percent=" + percent +
|
||||
", datasetId='" + datasetId + '\'' +
|
||||
", createTime=" + createTime +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,155 @@
|
|||
package com.bjtds.brichat.entity.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
/**
|
||||
* PDF任务状态查询响应实体类
|
||||
*/
|
||||
public class PdfTaskStatusResponse {
|
||||
|
||||
@JsonProperty("task_id")
|
||||
private String taskId;
|
||||
|
||||
/**
|
||||
* 任务状态:PENDING/SUCCESS/FAILURE/STARTED
|
||||
*/
|
||||
private String status;
|
||||
|
||||
/**
|
||||
* 转换结果(仅SUCCESS时返回)
|
||||
*/
|
||||
private String result;
|
||||
|
||||
/**
|
||||
* 进度信息
|
||||
*/
|
||||
private Progress progress;
|
||||
|
||||
/**
|
||||
* 错误信息(仅FAILURE时返回)
|
||||
*/
|
||||
private Object error;
|
||||
|
||||
/**
|
||||
* 进度信息内部类
|
||||
*/
|
||||
public static class Progress {
|
||||
@JsonProperty("total_pages")
|
||||
private Integer totalPages;
|
||||
|
||||
@JsonProperty("success_pages")
|
||||
private Integer successPages;
|
||||
|
||||
@JsonProperty("failed_pages")
|
||||
private Integer failedPages;
|
||||
|
||||
@JsonProperty("failed_page_numbers")
|
||||
private Object[] failedPageNumbers;
|
||||
|
||||
public Integer getTotalPages() {
|
||||
return totalPages;
|
||||
}
|
||||
|
||||
public void setTotalPages(Integer totalPages) {
|
||||
this.totalPages = totalPages;
|
||||
}
|
||||
|
||||
public Integer getSuccessPages() {
|
||||
return successPages;
|
||||
}
|
||||
|
||||
public void setSuccessPages(Integer successPages) {
|
||||
this.successPages = successPages;
|
||||
}
|
||||
|
||||
public Integer getFailedPages() {
|
||||
return failedPages;
|
||||
}
|
||||
|
||||
public void setFailedPages(Integer failedPages) {
|
||||
this.failedPages = failedPages;
|
||||
}
|
||||
|
||||
public Object[] getFailedPageNumbers() {
|
||||
return failedPageNumbers;
|
||||
}
|
||||
|
||||
public void setFailedPageNumbers(Object[] failedPageNumbers) {
|
||||
this.failedPageNumbers = failedPageNumbers;
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算转换百分比
|
||||
*/
|
||||
public Double getPercent() {
|
||||
if (totalPages == null || totalPages == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
return (successPages != null ? successPages : 0) * 100.0 / totalPages;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Progress{" +
|
||||
"totalPages=" + totalPages +
|
||||
", successPages=" + successPages +
|
||||
", failedPages=" + failedPages +
|
||||
", percent=" + getPercent() + "%" +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
public PdfTaskStatusResponse() {
|
||||
}
|
||||
|
||||
public String getTaskId() {
|
||||
return taskId;
|
||||
}
|
||||
|
||||
public void setTaskId(String taskId) {
|
||||
this.taskId = taskId;
|
||||
}
|
||||
|
||||
public String getStatus() {
|
||||
return status;
|
||||
}
|
||||
|
||||
public void setStatus(String status) {
|
||||
this.status = status;
|
||||
}
|
||||
|
||||
public String getResult() {
|
||||
return result;
|
||||
}
|
||||
|
||||
public void setResult(String result) {
|
||||
this.result = result;
|
||||
}
|
||||
|
||||
public Progress getProgress() {
|
||||
return progress;
|
||||
}
|
||||
|
||||
public void setProgress(Progress progress) {
|
||||
this.progress = progress;
|
||||
}
|
||||
|
||||
public Object getError() {
|
||||
return error;
|
||||
}
|
||||
|
||||
public void setError(Object error) {
|
||||
this.error = error;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PdfTaskStatusResponse{" +
|
||||
"taskId='" + taskId + '\'' +
|
||||
", status='" + status + '\'' +
|
||||
", progress=" + progress +
|
||||
", hasResult=" + (result != null && !result.isEmpty()) +
|
||||
", hasError=" + (error != null) +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -4,4 +4,6 @@ import com.bjtds.brichat.entity.dataset.DatasetsDocRenameReq;
|
|||
|
||||
public interface DatasetsDocService {
|
||||
void renameFile(DatasetsDocRenameReq request);
|
||||
|
||||
String getUploadById(String id);
|
||||
}
|
||||
|
|
|
@ -4,45 +4,76 @@ import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
|||
import com.bjtds.brichat.entity.dataset.RetrievalModel;
|
||||
import com.bjtds.brichat.entity.dify.DatasetDto;
|
||||
import com.bjtds.brichat.entity.dify.DifyDatasetResponse;
|
||||
import com.bjtds.brichat.entity.dto.PdfConversionResponse;
|
||||
import com.bjtds.brichat.entity.dto.PdfTaskDto;
|
||||
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
||||
import com.bjtds.brichat.util.Constants;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import io.github.guoshiqiufeng.dify.dataset.DifyDataset;
|
||||
import io.github.guoshiqiufeng.dify.dataset.dto.request.DatasetInfoRequest;
|
||||
import io.github.guoshiqiufeng.dify.dataset.dto.response.DatasetInfoResponse;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.ParameterizedTypeReference;
|
||||
import org.springframework.core.io.ByteArrayResource;
|
||||
import org.springframework.data.redis.core.RedisTemplate;
|
||||
import org.springframework.http.*;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.LinkedMultiValueMap;
|
||||
import org.springframework.util.MultiValueMap;
|
||||
import org.springframework.util.StringUtils;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Service
|
||||
public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DifyDatasetApiServiceImpl.class);
|
||||
private static final String PDF_TASK_REDIS_KEY = "pdf:conversion:tasks";
|
||||
|
||||
@Autowired
|
||||
private RestTemplate restTemplate;
|
||||
|
||||
@Autowired
|
||||
@Qualifier("redisTemplate")
|
||||
private RedisTemplate<String, Object> redisTemplate;
|
||||
|
||||
//开源组件
|
||||
@Resource
|
||||
private DifyDataset difyDatasetService;
|
||||
|
||||
// @Value("${bjtds.difyDatasets.datasetPath}")
|
||||
// @Value("${dify.url}")
|
||||
// private String datasetPat;
|
||||
@Value("${dify.url}")
|
||||
private String difyUrl;
|
||||
|
||||
|
||||
@Value("${dify.dataset.api-key}")
|
||||
private String apiKey;
|
||||
|
||||
@Value("${pdf.conversion.service.url}")
|
||||
private String pdfConversionServiceUrl;
|
||||
|
||||
@Value("${pdf.conversion.service.api-url}")
|
||||
private String pdfConversionApiUrl;
|
||||
|
||||
@Value("${pdf.conversion.service.model-name}")
|
||||
private String pdfConversionModelName;
|
||||
|
||||
@Value("${pdf.conversion.service.max-workers}")
|
||||
private Integer pdfConversionMaxWorkers;
|
||||
|
||||
@Override
|
||||
public ResponseEntity<DatasetDto> createDataset(String name, String description) {
|
||||
// 1. 设置请求URL
|
||||
|
@ -98,6 +129,23 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
|||
|
||||
}
|
||||
|
||||
/***
|
||||
*
|
||||
*
|
||||
*1 若文件为类型pdf
|
||||
* 1.1远程调用接口将pdf文件解析并转换markdown文件,获取回调任务id
|
||||
* 2.1将回调的任务id放入redis队列中
|
||||
* redis中存储的是一个对象数组,对象包含 name,taskId,percent(解析百分比)
|
||||
* 3.1定时任务读取任务id的状态,若已解析完毕上传md文件至dify知识库中(在其他类中进行)
|
||||
*
|
||||
*2 若文件类型为其他类型,正常执行上传逻辑
|
||||
*
|
||||
*
|
||||
* @param request
|
||||
* @param file
|
||||
* @return
|
||||
* @throws JsonProcessingException
|
||||
*/
|
||||
@Override
|
||||
public ResponseEntity<Map> createDocumentByFile(DocumentUploadReq request, MultipartFile file) throws JsonProcessingException {
|
||||
// 参数验证
|
||||
|
@ -109,6 +157,81 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
|||
throw new IllegalArgumentException("上传文件不能为空");
|
||||
}
|
||||
|
||||
// 获取文件名和扩展名
|
||||
String originalFilename = file.getOriginalFilename();
|
||||
if (originalFilename == null) {
|
||||
throw new IllegalArgumentException("文件名不能为空");
|
||||
}
|
||||
|
||||
String fileExtension = getFileExtension(originalFilename).toLowerCase();
|
||||
logger.info("上传文件: {}, 扩展名: {}", originalFilename, fileExtension);
|
||||
|
||||
// 1. 需深度解析,提取图片中的文本
|
||||
if (request.getDeepAnalysis()) {
|
||||
return handlePdfFile(request, file);
|
||||
} else {
|
||||
// 2.
|
||||
return handleNormalFile(request, file);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理PDF文件
|
||||
* 1.1 远程调用接口将pdf文件解析并转换markdown文件,获取回调任务id
|
||||
* 2.1 将回调的任务id放入redis队列中
|
||||
*/
|
||||
private ResponseEntity<Map> handlePdfFile(DocumentUploadReq request, MultipartFile file) throws JsonProcessingException {
|
||||
try {
|
||||
// 1.1 调用PDF转换服务
|
||||
String taskId = callPdfConversionService(file);
|
||||
|
||||
// 2.1 将任务信息存入Redis
|
||||
// PdfTaskDto pdfTask = new PdfTaskDto(
|
||||
// file.getOriginalFilename(),
|
||||
// taskId,
|
||||
// 0.0, // 初始化进度为0%
|
||||
// request.getDatasetId()
|
||||
// );
|
||||
DatasetInfoRequest datasetInfoRequest = new DatasetInfoRequest();
|
||||
datasetInfoRequest.setDatasetId(request.getDatasetId());
|
||||
|
||||
DatasetInfoResponse datasetInfo = difyDatasetService.info(datasetInfoRequest);
|
||||
String datasetName = datasetInfo.getName();
|
||||
|
||||
request.setDeepAnalysis(false);
|
||||
|
||||
PdfTaskDto pdfTask = PdfTaskDto.builder()
|
||||
.name(file.getOriginalFilename())
|
||||
.taskId(taskId)
|
||||
.percent(0.0)
|
||||
.datasetId(request.getDatasetId())
|
||||
.datasetName(datasetName)
|
||||
.uploadReq(request)
|
||||
.build();
|
||||
|
||||
storePdfTaskToRedis(pdfTask);
|
||||
|
||||
logger.info("PDF转换任务已提交,任务ID: {}, 文件名: {}", taskId, file.getOriginalFilename());
|
||||
|
||||
// 返回成功响应
|
||||
Map<String, Object> response = new HashMap<>();
|
||||
response.put("success", true);
|
||||
response.put("message", "PDF文件已提交转换,任务ID: " + taskId);
|
||||
response.put("task_id", taskId);
|
||||
response.put("file_name", file.getOriginalFilename());
|
||||
|
||||
return ResponseEntity.ok(response);
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("PDF文件处理失败: {}", e.getMessage(), e);
|
||||
throw new RuntimeException("PDF文件处理失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理非PDF文件,执行正常上传逻辑
|
||||
*/
|
||||
private ResponseEntity<Map> handleNormalFile(DocumentUploadReq request, MultipartFile file) throws JsonProcessingException {
|
||||
String url = difyUrl + Constants.DATABASE_API + "/{dataset_id}/document/create-by-file";
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
|
@ -125,7 +248,6 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
|||
// 创建临时请求对象,不包含datasetId(datasetId用于URL路径参数)
|
||||
DocumentUploadReq dataRequest = new DocumentUploadReq();
|
||||
dataRequest.setIndexingTechnique(request.getIndexingTechnique());
|
||||
|
||||
dataRequest.setProcessRule(request.getProcessRule());
|
||||
|
||||
//设置检索模式(默认混合检索)
|
||||
|
@ -178,5 +300,92 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 调用PDF转换服务
|
||||
*/
|
||||
private String callPdfConversionService(MultipartFile file) {
|
||||
String url = pdfConversionServiceUrl + "/v1/pdf2md";
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.MULTIPART_FORM_DATA);
|
||||
|
||||
MultiValueMap<String, Object> body = new LinkedMultiValueMap<>();
|
||||
|
||||
// 添加PDF文件
|
||||
try {
|
||||
body.add("pdf_file", new ByteArrayResource(file.getBytes()) {
|
||||
@Override
|
||||
public String getFilename() {
|
||||
return file.getOriginalFilename();
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("PDF文件读取失败: " + e.getMessage(), e);
|
||||
}
|
||||
|
||||
// 添加可选参数
|
||||
body.add("api_url", pdfConversionApiUrl);
|
||||
body.add("model_name", pdfConversionModelName);
|
||||
body.add("max_workers", pdfConversionMaxWorkers.toString());
|
||||
|
||||
HttpEntity<MultiValueMap<String, Object>> requestEntity = new HttpEntity<>(body, headers);
|
||||
|
||||
try {
|
||||
ResponseEntity<PdfConversionResponse> response = restTemplate.exchange(
|
||||
url,
|
||||
HttpMethod.POST,
|
||||
requestEntity,
|
||||
PdfConversionResponse.class
|
||||
);
|
||||
|
||||
if (response.getBody() == null || !StringUtils.hasText(response.getBody().getTaskId())) {
|
||||
throw new RuntimeException("PDF转换服务返回的任务ID为空");
|
||||
}
|
||||
|
||||
return response.getBody().getTaskId();
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("调用PDF转换服务失败: {}", e.getMessage(), e);
|
||||
throw new RuntimeException("调用PDF转换服务失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 将PDF任务信息存储到Redis
|
||||
*/
|
||||
private void storePdfTaskToRedis(PdfTaskDto pdfTask) {
|
||||
try {
|
||||
// 使用Hash结构存储,key为任务ID,value为任务信息
|
||||
String hashKey = PDF_TASK_REDIS_KEY + ":" + pdfTask.getTaskId();
|
||||
redisTemplate.opsForHash().put(hashKey, "taskInfo", pdfTask);
|
||||
|
||||
// 设置过期时间为24小时
|
||||
redisTemplate.expire(hashKey, 24, TimeUnit.HOURS);
|
||||
|
||||
// 同时将任务ID加入到任务列表中,便于定时任务扫描
|
||||
redisTemplate.opsForList().rightPush(PDF_TASK_REDIS_KEY + ":list", pdfTask.getTaskId());
|
||||
|
||||
logger.info("PDF任务信息已存储到Redis: {}", pdfTask);
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("存储PDF任务信息到Redis失败: {}", e.getMessage(), e);
|
||||
throw new RuntimeException("存储PDF任务信息到Redis失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文件扩展名
|
||||
*/
|
||||
private String getFileExtension(String filename) {
|
||||
if (filename == null || filename.isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
int lastDotIndex = filename.lastIndexOf('.');
|
||||
if (lastDotIndex == -1 || lastDotIndex == filename.length() - 1) {
|
||||
return "";
|
||||
}
|
||||
return filename.substring(lastDotIndex + 1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package com.bjtds.brichat.service.impl;
|
||||
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import com.bjtds.brichat.entity.dataset.DatasetsDocRenameReq;
|
||||
import com.bjtds.brichat.mapper.postgresql.DifyDatasetsDocMapper;
|
||||
import com.bjtds.brichat.service.DatasetsDocService;
|
||||
|
@ -25,4 +26,11 @@ public class DatasetsDocServiceImpl implements DatasetsDocService {
|
|||
throw new RuntimeException("文档未找到或更新失败");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploadById(String id) {
|
||||
String dataSourceById = difyDatasetsDocMapper.getDataSourceById(id);
|
||||
String uploadId = JSONUtil.parseObj(dataSourceById).getStr("upload_file_id");
|
||||
return uploadId;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,14 +10,11 @@ import com.bjtds.brichat.entity.dify.DifyUploadFile;
|
|||
import com.bjtds.brichat.entity.dto.FilePreviewDto;
|
||||
import com.bjtds.brichat.mapper.postgresql.DifyUploadFileMapper;
|
||||
import com.bjtds.brichat.service.ChatMessageService;
|
||||
import com.bjtds.brichat.service.DocumentService;
|
||||
import com.bjtds.brichat.service.DatasetsDocService;
|
||||
import com.bjtds.brichat.service.FileService;
|
||||
import com.bjtds.brichat.util.Constants;
|
||||
import com.bjtds.brichat.util.FileUploadUtil;
|
||||
import com.bjtds.brichat.util.PdfUtils;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import io.github.guoshiqiufeng.dify.dataset.DifyDataset;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.ArrayUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
@ -36,7 +33,6 @@ import java.io.*;
|
|||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
|
@ -73,7 +69,7 @@ public class FileServiceImpl implements FileService {
|
|||
private DifyUploadFileMapper difyUploadFileMapper;
|
||||
|
||||
@Resource
|
||||
private DocumentService documentService;
|
||||
private DatasetsDocService datasetsDocService;
|
||||
|
||||
// @Override
|
||||
// public String fileUpload(MultipartFile multiFile, String filePath) {
|
||||
|
@ -384,7 +380,7 @@ public class FileServiceImpl implements FileService {
|
|||
String sysMessageId = (String)traceJson.get("messageId");
|
||||
List<TraceFile> traceFiles = new ArrayList<>();
|
||||
for( String key : tracePdf.keySet()){
|
||||
String uploadId = documentService.getUploadById(key);
|
||||
String uploadId = datasetsDocService.getUploadById(key);
|
||||
//获取文件上传地址
|
||||
// /var/www/dify/storage + "/" + upload_files/1f93db68-fb8c-4d58-bff2-3e91d72757cf/9b12ca0d-dd5a-4d4e-9d3f-946227ae4e84.xlsx
|
||||
DifyUploadFile uploadPath = difyUploadFileMapper.getFileById(uploadId);
|
||||
|
|
|
@ -0,0 +1,408 @@
|
|||
package com.bjtds.brichat.service.task;
|
||||
|
||||
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
||||
import com.bjtds.brichat.entity.dataset.RetrievalModel;
|
||||
import com.bjtds.brichat.entity.dto.PdfTaskDto;
|
||||
import com.bjtds.brichat.entity.dto.PdfTaskStatusResponse;
|
||||
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.data.redis.core.RedisTemplate;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.StringUtils;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* PDF转换任务定时服务
|
||||
*/
|
||||
@Service
|
||||
public class PdfConversionTaskService {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(PdfConversionTaskService.class);
|
||||
private static final String PDF_TASK_REDIS_KEY = "pdf:conversion:tasks";
|
||||
|
||||
@Autowired
|
||||
private RestTemplate restTemplate;
|
||||
|
||||
@Autowired
|
||||
@Qualifier("redisTemplate")
|
||||
private RedisTemplate<String, Object> redisTemplate;
|
||||
|
||||
@Autowired
|
||||
private DifyDatasetApiService difyDatasetApiService;
|
||||
|
||||
@Value("${pdf.conversion.service.url}")
|
||||
private String pdfConversionServiceUrl;
|
||||
|
||||
/**
|
||||
* 定时任务:每10秒检查一次PDF转换任务状态
|
||||
*/
|
||||
@Scheduled(fixedRate = 10000) // 10秒执行一次
|
||||
public void checkPdfConversionTasks() {
|
||||
try {
|
||||
// 获取所有待处理的任务ID
|
||||
List<Object> taskIds = redisTemplate.opsForList().range(PDF_TASK_REDIS_KEY + ":list", 0, -1);
|
||||
|
||||
if (taskIds == null || taskIds.isEmpty()) {
|
||||
logger.debug("没有待处理的PDF转换任务");
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info("开始检查PDF转换任务状态,共{}个任务", taskIds.size());
|
||||
|
||||
for (Object taskIdObj : taskIds) {
|
||||
String taskId = taskIdObj.toString();
|
||||
try {
|
||||
checkSingleTask(taskId);
|
||||
} catch (Exception e) {
|
||||
logger.error("检查任务{}状态时发生错误: {}", taskId, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("定时检查PDF转换任务时发生错误: {}", e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查单个任务的状态
|
||||
*/
|
||||
private void checkSingleTask(String taskId) {
|
||||
try {
|
||||
// 从Redis获取任务信息
|
||||
String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId;
|
||||
PdfTaskDto taskInfo = (PdfTaskDto) redisTemplate.opsForHash().get(hashKey, "taskInfo");
|
||||
|
||||
if (taskInfo == null) {
|
||||
logger.warn("任务{}的信息在Redis中不存在,从队列中移除", taskId);
|
||||
removeTaskFromQueue(taskId);
|
||||
return;
|
||||
}
|
||||
|
||||
// 调用状态查询接口
|
||||
PdfTaskStatusResponse statusResponse = queryTaskStatus(taskId);
|
||||
|
||||
if (statusResponse == null) {
|
||||
logger.warn("无法获取任务{}的状态信息", taskId);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info("任务{}状态: {}, 进度: {}", taskId, statusResponse.getStatus(),
|
||||
statusResponse.getProgress() != null ? statusResponse.getProgress().toString() : "无进度信息");
|
||||
|
||||
// 根据状态处理任务
|
||||
switch (statusResponse.getStatus()) {
|
||||
case "SUCCESS":
|
||||
handleSuccessTask(taskInfo, statusResponse);
|
||||
break;
|
||||
case "FAILURE":
|
||||
handleFailedTask(taskInfo, statusResponse);
|
||||
break;
|
||||
case "STARTED":
|
||||
case "PENDING":
|
||||
updateTaskProgress(taskInfo, statusResponse);
|
||||
break;
|
||||
default:
|
||||
logger.warn("任务{}状态未知: {}", taskId, statusResponse.getStatus());
|
||||
break;
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("检查任务{}时发生错误: {}", taskId, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询任务状态
|
||||
*/
|
||||
private PdfTaskStatusResponse queryTaskStatus(String taskId) {
|
||||
String url = pdfConversionServiceUrl + "/v1/pdf2md/status/" + taskId;
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
HttpEntity<?> requestEntity = new HttpEntity<>(headers);
|
||||
|
||||
try {
|
||||
ResponseEntity<PdfTaskStatusResponse> response = restTemplate.exchange(
|
||||
url,
|
||||
HttpMethod.GET,
|
||||
requestEntity,
|
||||
PdfTaskStatusResponse.class
|
||||
);
|
||||
|
||||
return response.getBody();
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("查询任务{}状态失败: {}", taskId, e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理成功完成的任务
|
||||
*/
|
||||
private void handleSuccessTask(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) {
|
||||
String taskId = taskInfo.getTaskId();
|
||||
String result = statusResponse.getResult();
|
||||
|
||||
logger.info("=== PDF转换任务完成 ===");
|
||||
logger.info("任务ID: {}", taskId);
|
||||
logger.info("文件名: {}", taskInfo.getName());
|
||||
logger.info("数据集ID: {}", taskInfo.getDatasetId());
|
||||
|
||||
if (statusResponse.getProgress() != null) {
|
||||
logger.info("转换进度: {}", statusResponse.getProgress().toString());
|
||||
}
|
||||
|
||||
if (StringUtils.hasText(result)) {
|
||||
logger.info("=== 转换结果开始 ===");
|
||||
logger.info("MD文件路径: {}", result);
|
||||
logger.info("=== 转换结果结束 ===");
|
||||
|
||||
// 上传MD文件到Dify知识库
|
||||
try {
|
||||
uploadMdFileToDifyDataset(taskInfo, result);
|
||||
logger.info("MD文件已成功上传到知识库");
|
||||
} catch (Exception e) {
|
||||
logger.error("上传MD文件到知识库失败: {}", e.getMessage(), e);
|
||||
}
|
||||
} else {
|
||||
logger.warn("任务{}转换完成但结果为空", taskId);
|
||||
}
|
||||
|
||||
// 从队列中移除任务
|
||||
removeTaskFromQueue(taskId);
|
||||
logger.info("任务{}已完成并从队列中移除", taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理失败的任务
|
||||
*/
|
||||
private void handleFailedTask(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) {
|
||||
String taskId = taskInfo.getTaskId();
|
||||
|
||||
logger.error("=== PDF转换任务失败 ===");
|
||||
logger.error("任务ID: {}", taskId);
|
||||
logger.error("文件名: {}", taskInfo.getName());
|
||||
logger.error("数据集ID: {}", taskInfo.getDatasetId());
|
||||
|
||||
if (statusResponse.getError() != null) {
|
||||
logger.error("错误信息: {}", statusResponse.getError());
|
||||
}
|
||||
|
||||
// 从队列中移除失败的任务
|
||||
removeTaskFromQueue(taskId);
|
||||
logger.info("失败任务{}已从队列中移除", taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新任务进度
|
||||
*/
|
||||
private void updateTaskProgress(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) {
|
||||
String taskId = taskInfo.getTaskId();
|
||||
|
||||
// 更新任务进度
|
||||
if (statusResponse.getProgress() != null) {
|
||||
Double newPercent = statusResponse.getProgress().getPercent();
|
||||
taskInfo.setPercent(newPercent);
|
||||
|
||||
// 更新Redis中的任务信息
|
||||
String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId;
|
||||
redisTemplate.opsForHash().put(hashKey, "taskInfo", taskInfo);
|
||||
|
||||
logger.debug("任务{}进度更新: {}%", taskId, String.format("%.1f", newPercent));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从队列中移除任务
|
||||
*/
|
||||
private void removeTaskFromQueue(String taskId) {
|
||||
try {
|
||||
// 从任务列表中移除
|
||||
redisTemplate.opsForList().remove(PDF_TASK_REDIS_KEY + ":list", 1, taskId);
|
||||
|
||||
// 删除任务详细信息
|
||||
String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId;
|
||||
redisTemplate.delete(hashKey);
|
||||
|
||||
logger.debug("任务{}已从Redis队列中移除", taskId);
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("移除任务{}时发生错误: {}", taskId, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传MD文件到Dify知识库
|
||||
*/
|
||||
private void uploadMdFileToDifyDataset(PdfTaskDto taskInfo, String mdFilePath) throws Exception {
|
||||
logger.info("开始上传MD文件到知识库: filePath={}, datasetId={}", mdFilePath, taskInfo.getDatasetId());
|
||||
|
||||
// 1. 验证文件是否存在
|
||||
Path path = Paths.get(mdFilePath);
|
||||
if (!Files.exists(path)) {
|
||||
throw new RuntimeException("MD文件不存在: " + mdFilePath);
|
||||
}
|
||||
|
||||
// 2. 创建MultipartFile
|
||||
MultipartFile multipartFile = createMultipartFileFromPath(path, taskInfo.getName());
|
||||
//修改文件名
|
||||
// multipartFile = new SimpleMultipartFile(
|
||||
// multipartFile.getName(),
|
||||
// taskInfo.getName() + ".md",
|
||||
// multipartFile.getContentType(),
|
||||
// multipartFile.getBytes()
|
||||
// );
|
||||
|
||||
|
||||
// 3. 调用上传接口
|
||||
ResponseEntity<Map> response = difyDatasetApiService.createDocumentByFile(taskInfo.getUploadReq(), multipartFile);
|
||||
|
||||
if (response.getStatusCode().is2xxSuccessful()) {
|
||||
logger.info("MD文件上传成功: taskId={}, fileName={}, response={}",
|
||||
taskInfo.getTaskId(), taskInfo.getName(), response.getBody());
|
||||
} else {
|
||||
throw new RuntimeException("MD文件上传失败,HTTP状态码: " + response.getStatusCode());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从文件路径创建MultipartFile (JDK 1.8兼容版本)
|
||||
*/
|
||||
private MultipartFile createMultipartFileFromPath(Path filePath, String fileName) throws IOException {
|
||||
File file = filePath.toFile();
|
||||
String originalFileName = fileName.endsWith(".md") ? fileName : fileName + ".md";
|
||||
|
||||
// JDK 1.8兼容的文件读取方式
|
||||
byte[] content = readFileToByteArray(file);
|
||||
|
||||
return new SimpleMultipartFile(
|
||||
"file",
|
||||
originalFileName,
|
||||
"text/markdown",
|
||||
content
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* JDK 1.8兼容的文件读取方法
|
||||
*/
|
||||
private byte[] readFileToByteArray(File file) throws IOException {
|
||||
try (FileInputStream fis = new FileInputStream(file);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
|
||||
byte[] buffer = new byte[8192];
|
||||
int bytesRead;
|
||||
while ((bytesRead = fis.read(buffer)) != -1) {
|
||||
baos.write(buffer, 0, bytesRead);
|
||||
}
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 简单的MultipartFile实现类 (JDK 1.8兼容)
|
||||
*/
|
||||
private static class SimpleMultipartFile implements MultipartFile {
|
||||
private final String name;
|
||||
private final String originalFilename;
|
||||
private final String contentType;
|
||||
private final byte[] content;
|
||||
|
||||
public SimpleMultipartFile(String name, String originalFilename, String contentType, byte[] content) {
|
||||
this.name = name;
|
||||
this.originalFilename = originalFilename;
|
||||
this.contentType = contentType;
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getOriginalFilename() {
|
||||
return originalFilename;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getContentType() {
|
||||
return contentType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return content.length == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSize() {
|
||||
return content.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getBytes() throws IOException {
|
||||
return content;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream getInputStream() throws IOException {
|
||||
return new ByteArrayInputStream(content);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transferTo(File dest) throws IOException, IllegalStateException {
|
||||
try (FileOutputStream fos = new FileOutputStream(dest)) {
|
||||
fos.write(content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建文档上传请求
|
||||
*/
|
||||
private DocumentUploadReq createDocumentUploadRequest(String datasetId) {
|
||||
DocumentUploadReq request = new DocumentUploadReq();
|
||||
request.setDatasetId(datasetId);
|
||||
request.setDeepAnalysis(false); // MD文件不需要深度解析
|
||||
request.setIndexingTechnique("high_quality");
|
||||
request.setDocLanguage("chinese");
|
||||
|
||||
// 设置处理规则
|
||||
DocumentUploadReq.ProcessRule processRule = new DocumentUploadReq.ProcessRule();
|
||||
processRule.setMode("automatic");
|
||||
request.setProcessRule(processRule);
|
||||
|
||||
// 设置检索模式(默认混合检索)
|
||||
RetrievalModel retrievalModel = new RetrievalModel();
|
||||
retrievalModel.setSearchMethod("hybrid_search");
|
||||
retrievalModel.setRerankingEnable(true);
|
||||
RetrievalModel.RerankingModel rerankingModel = new RetrievalModel.RerankingModel();
|
||||
rerankingModel.setRerankingModelName("bge-reanker-v2-m3");
|
||||
rerankingModel.setRerankingProviderName("langgenius/huggingface_tei/huggingface_tei");
|
||||
retrievalModel.setTopK(3);
|
||||
retrievalModel.setRerankingModel(rerankingModel);
|
||||
retrievalModel.setScoreThresholdEnabled(false);
|
||||
retrievalModel.setScoreThreshold(0.5f);
|
||||
request.setRetrievalModel(retrievalModel);
|
||||
|
||||
return request;
|
||||
}
|
||||
}
|
|
@ -16,4 +16,7 @@ public class Constants {
|
|||
public static final String BEARER = "Bearer ";
|
||||
|
||||
public static final String DATABASE_API = "/v1/datasets";
|
||||
|
||||
|
||||
public static final String PDF_TASK_REDIS_KEY = "pdf:conversion:tasks";
|
||||
}
|
||||
|
|
|
@ -48,4 +48,13 @@ dify:
|
|||
email: bjtds@bjtds.com # 请替换为实际的 Dify 服务邮箱,若不需要调用 server相关接口可不填
|
||||
password: 123456Aa # 请替换为实际的 Dify 服务密码,若不需要调用 server相关接口可不填
|
||||
dataset:
|
||||
api-key: ${dify-dataset-api-key:dataset-zVa4uJBUem96P19o8iBtyihQ} # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
|
||||
api-key: ${dify-dataset-api-key:dataset-zVa4uJBUem96P19o8iBtyihQ} # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
|
||||
|
||||
# PDF转换服务配置
|
||||
pdf:
|
||||
conversion:
|
||||
service:
|
||||
url: ${pdf-conversion-url:http://192.168.1.211:12201} # PDF转换服务地址
|
||||
api-url: ${pdf-conversion-api-url:http://192.168.1.211:1050/v1/chat/completions} # QwenVL API服务地址
|
||||
model-name: ${pdf-conversion-model:qwenvl} # 使用的模型名称
|
||||
max-workers: ${pdf-conversion-max-workers:10} # 并发线程数
|
||||
|
|
|
@ -48,4 +48,13 @@ dify:
|
|||
email: bjtds@bjtds.com # 请替换为实际的 Dify 服务邮箱,若不需要调用 server相关接口可不填
|
||||
password: 123456Aa # 请替换为实际的 Dify 服务密码,若不需要调用 server相关接口可不填
|
||||
dataset:
|
||||
api-key: ${dify-dataset-api-key:dataset-0Hij9IwoWYbJe1vvwVh8y7DS} # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
|
||||
api-key: ${dify-dataset-api-key:dataset-0Hij9IwoWYbJe1vvwVh8y7DS} # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
|
||||
|
||||
# PDF转换服务配置
|
||||
pdf:
|
||||
conversion:
|
||||
service:
|
||||
url: ${pdf-conversion-url:http://192.168.8.253:12201} # PDF转换服务地址
|
||||
api-url: ${pdf-conversion-api-url:http://192.168.8.253:1050/v1/chat/completions} # QwenVL API服务地址
|
||||
model-name: ${pdf-conversion-model:qwenvl} # 使用的模型名称
|
||||
max-workers: ${pdf-conversion-max-workers:10} # 并发线程数
|
|
@ -18,13 +18,13 @@ spring:
|
|||
# 可选值: wuhan, beijing
|
||||
# 线上部署时可通过环境变量 SPRING_PROFILES_ACTIVE 覆盖
|
||||
profiles:
|
||||
active: beijing
|
||||
active: wuhan
|
||||
|
||||
# 文件上传配置
|
||||
servlet:
|
||||
multipart:
|
||||
max-request-size: 10MB
|
||||
max-file-size: 10MB
|
||||
max-request-size: 100MB
|
||||
max-file-size: 100MB
|
||||
|
||||
# 数据源配置已移至对应的环境配置文件
|
||||
# application-wuhan.yml 和 application-beijing.yml
|
||||
|
|
Loading…
Reference in New Issue