From 6ef06e12a9ade86c26fa8ebe6a2fd674f9d82bea Mon Sep 17 00:00:00 2001 From: wenjinbo <599483010@qq.com> Date: Tue, 29 Jul 2025 15:53:14 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=B7=B1=E5=BA=A6=E8=A7=A3?= =?UTF-8?q?=E6=9E=90=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chat-client/.env.development | 12 +- chat-client/.env.production | 12 +- chat-client/src/api/dataset.ts | 10 + chat-client/src/locales/en.ts | 91 ++-- chat-client/src/locales/zh.ts | 12 + .../datasets/components/DocumentList.vue | 174 +++++++- .../task/PdfConversionTaskService.java | 402 ++++++++++++++++++ 7 files changed, 642 insertions(+), 71 deletions(-) create mode 100644 chat-server/src/main/java/com/bjtds/brichat/service/task/PdfConversionTaskService.java diff --git a/chat-client/.env.development b/chat-client/.env.development index 9799822..9c24f36 100644 --- a/chat-client/.env.development +++ b/chat-client/.env.development @@ -2,17 +2,17 @@ # 此文件修改后需要重启项目 NODE_ENV=development #VUE_APP_BASE_URL='/vab-mock-server' -VUE_APP_BASE_URL='http://localhost:10001' -VUE_APP_API_BASE_URL='http://localhost:8080' +# VUE_APP_BASE_URL='http://localhost:10001' +# VUE_APP_API_BASE_URL='http://localhost:8080' #北京服务器配置 # VUE_APP_BASE_URL='http://192.168.1.211:80/brichat' # VUE_APP_API_BASE_URL='http://192.168.1.211:80/brichat' # #武汉公司服务器 -# VUE_APP_BASE_URL='http://192.168.8.253:80/brichat' -# VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat' +VUE_APP_BASE_URL='http://192.168.8.253:80/brichat' +VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat' #总部服务器配置 # VUE_APP_BASE_URL='http://192.168.0.33:80/brichat' # VUE_APP_API_BASE_URL='http://192.168.0.33:80/brichat' -VUE_APP_GITHUB_USER_NAME=test -VUE_APP_SECRET_KEY=preview +# VUE_APP_GITHUB_USER_NAME=test +# VUE_APP_SECRET_KEY=preview diff --git a/chat-client/.env.production b/chat-client/.env.production index d6534da..8d5124b 100644 --- a/chat-client/.env.production +++ b/chat-client/.env.production @@ -2,18 +2,18 @@ # 此文件修改后需要重启项目 NODE_ENV=production # VUE_APP_BASE_URL='/vab-mock-server' -VUE_APP_BASE_URL='http://localhost:10001' -VUE_APP_API_BASE_URL='http://localhost:10001' +# VUE_APP_BASE_URL='http://localhost:10001' +# VUE_APP_API_BASE_URL='http://localhost:10001' #北京服务器 # VUE_APP_BASE_URL='http://192.168.1.211:80/brichat' # VUE_APP_API_BASE_URL='http://192.168.1.211:80/brichat' #武汉公司服务器 -# VUE_APP_BASE_URL='http://192.168.8.253:80/brichat' -# VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat' +VUE_APP_BASE_URL='http://192.168.8.253:80/brichat' +VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat' # 总部服务器 # VUE_APP_BASE_URL='http://192.168.0.33:80/brichat' # VUE_APP_API_BASE_URL='http://192.168.0.33:80/brichat' -VUE_APP_GITHUB_USER_NAME=test -VUE_APP_SECRET_KEY=preview \ No newline at end of file +# VUE_APP_GITHUB_USER_NAME=test +# VUE_APP_SECRET_KEY=preview \ No newline at end of file diff --git a/chat-client/src/api/dataset.ts b/chat-client/src/api/dataset.ts index 8bf0248..c172975 100644 --- a/chat-client/src/api/dataset.ts +++ b/chat-client/src/api/dataset.ts @@ -138,4 +138,14 @@ export const deleteDataset = (id: string) => { datasetId } }) + } + + /** + * 获取深度解析任务列表 + */ + export const getDeepAnalysisList = () => { + return request({ + url: '/brichat-service/datasetManage/document/deepAnalysisList', + method: 'get' + }) } \ No newline at end of file diff --git a/chat-client/src/locales/en.ts b/chat-client/src/locales/en.ts index f3967c2..e7c14e5 100644 --- a/chat-client/src/locales/en.ts +++ b/chat-client/src/locales/en.ts @@ -295,6 +295,7 @@ export default { download: 'Download', rename: 'Rename', delete: 'Delete', + deepAnalysis: 'Deep Analysis', }, search: { placeholder: 'Enter keywords to search', @@ -314,6 +315,8 @@ export default { segmentation: 'Segmentation Rules', separatorPlaceholder: 'Separator (default: ###)', maxTokens: 'Max Tokens', + deepAnalysis: 'Deep Analysis', + deepAnalysisOption: 'Enable deep analysis to improve document understanding accuracy', selectFile: 'Select the file', fileTip: 'Multiple selection supported, max 100MB per file. Accepted formats: TXT, MD, MARKDOWN, MDX, PDF, HTML, HTM, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES', @@ -331,6 +334,14 @@ export default { confirm: 'Confirm', deleteconfirm: 'Delete Confirm', }, + deepAnalysisDialog: { + title: 'Deep Analysis Task List', + noTasks: 'No deep analysis tasks are currently in progress', + datasetName: 'Dataset', + taskId: 'Task ID', + createTime: 'Create Time', + close: 'Close', + }, messages: { FilenamecantEmpty: 'Filename cannot be empty', uploadSuccess: 'Upload successfully', @@ -359,6 +370,7 @@ export default { fetchFailed: 'Failed to get document list: ', previewFailed: 'Preview failed: ', downloadFailed: 'Download failed: ', + fetchDeepAnalysisFailed: 'Failed to get deep analysis task list', }, }, }, @@ -515,70 +527,35 @@ export default { Tip: 'Tip', selectDeleteData: 'Please select the data to be deleted', confirmDeleteApiKeys: 'This operation will permanently delete selected ', - confirmDeleteApiKeysEnd: ' API key, are you sure?', - confirmDeleteApiKeysEndPlural: ' API keys, are you sure?', + confirmDeleteApiKeysEnd:' API key, are you sure?', + confirmDeleteApiKeysEndPlural:' API keys, are you sure?', confirmRefreshCache: 'This operation will refresh Redis cache, reloading all API keys, are you sure?', updateSuccess: 'Update Successfully', updateFail: 'Update Failed', addSuccess: 'Add Successfully', - addFail: 'Add Failed', + addFail:'Add Failed' }, - prologue: { - title: 'Opening Statement Management', - query: 'Query', - save: 'Save', + prologue:{ chatType: 'Chat Type', - chatTypes: { - '1': 'Fault Diagnosis', - '2': 'Intelligent Q&A', - '3': 'Chart Report', - '4': 'Emergency Assistant Q&A', - '5': 'Diagnostic Code Lookup', - }, - openingContent: 'Opening Statement Content', - addRecommend: 'Add Recommended Question', - recommendation: 'Recommended Question', - action: 'Action', + selectChatType: 'Select Chat Type', + prologueContent: 'Prologue Content', + addRecommend: 'Add Recommendation', + recommendQuestion: 'Recommended Question', + actions: 'Actions', edit: 'Edit', delete: 'Delete', - dialog: { - addTitle: 'Add Recommended Question', - editTitle: 'Edit Recommended Question', - questionContent: 'Recommended Question', - cancel: 'Cancel', - confirm: 'Confirm', - save: 'Save', - }, - message: { - loadSuccess: 'Loaded successfully', - loadFailed: 'Load failed, please try again later', - loadFailedMess: 'Failed to load recommended questions', - notFound: 'Opening statement not found', - saveSuccess: 'Saved successfully', - saveFailed: 'Save failed, please try again later', - addSuccess: 'Recommended question added successfully', - addFailed: 'Failed to add recommended question', - editSuccess: 'Recommended question edited successfully', - editFailed: 'Failed to edit recommended question, please try again later', - deleteSuccess: 'Deleted successfully', - deleteFailed: 'Deletion failed', - }, - confirm: { - save: 'Are you sure you want to save the changes?', - tips: 'Tips', - delete: 'Are you sure you want to delete this recommended question?', - deleteConfirm: 'Delete Confirmation', - }, - placeholder: { - selectType: 'Select chat type', - }, - }, - vabtabs: { - refresh: 'Refresh', - closeOthers: 'Close Others', - closeLeft: 'Close Left', - closeRight: 'Close Right', - closeAll: 'Close All', - }, + confirmDelete: 'Are you sure you want to delete this recommendation?', + confirmSave: 'Are you sure you want to save the changes?', + deleteSuccess: 'Deleted successfully', + deleteFailure: 'Failed to delete, please try again later', + saveSuccess: 'Saved successfully', + saveFailure: 'Failed to save, please try again later', + loadSuccess: 'Loaded successfully', + loadFailure: 'Failed to load, please try again later', + noContent: 'No content found', + loading: 'Loading...', + updateRecommendation: 'Update Recommendation', + addRecommendation: 'Add Recommendation', + } }, } diff --git a/chat-client/src/locales/zh.ts b/chat-client/src/locales/zh.ts index a417884..36c7cd7 100644 --- a/chat-client/src/locales/zh.ts +++ b/chat-client/src/locales/zh.ts @@ -303,6 +303,7 @@ export default { download: '下载', rename: '重命名', delete: '删除', + deepAnalysis: '深度解析', }, search: { placeholder: '请输入关键词查询', @@ -322,6 +323,8 @@ export default { segmentation: '分段规则', separatorPlaceholder: '分隔符(默认###)', maxTokens: '最大Token数', + deepAnalysis: '深度解析', + deepAnalysisOption: '启用深度解析,提高文档理解精度', selectFile: '选择文件', fileTip: '支持多选,单个文件不超过100MB,可接受格式:TXT、MD、MARKDOWN、MDX、PDF、HTML、HTM、XLSX、XLS、DOCX、CSV、VTT、PROPERTIES', cancel: '取消', @@ -338,6 +341,14 @@ export default { confirm: '确定', deleteconfirm: '删除确认', }, + deepAnalysisDialog: { + title: '深度解析任务列表', + noTasks: '当前没有正在进行的深度解析任务', + datasetName: '知识库', + taskId: '任务ID', + createTime: '创建时间', + close: '关闭', + }, messages: { FilenamecantEmpty: '文件名不能为空', uploadSuccess: '上传成功', @@ -367,6 +378,7 @@ export default { fetchFailed: '获取文档列表失败: ', previewFailed: '预览失败: ', downloadFailed: '下载失败: ', + fetchDeepAnalysisFailed: '获取深度解析任务列表失败', }, }, }, diff --git a/chat-client/src/views/datasets/components/DocumentList.vue b/chat-client/src/views/datasets/components/DocumentList.vue index a99d9e3..42a5118 100644 --- a/chat-client/src/views/datasets/components/DocumentList.vue +++ b/chat-client/src/views/datasets/components/DocumentList.vue @@ -37,6 +37,7 @@ {{t('vabI18n.knowledge.document.buttons.batchDelete', { count: selectedRows.length })}} {{t('vabI18n.knowledge.document.buttons.refresh')}} + {{t('vabI18n.knowledge.document.buttons.deepAnalysis')}} {{t('vabI18n.knowledge.document.buttons.upload')}} @@ -186,6 +187,12 @@ + + + {{t('vabI18n.knowledge.document.uploadDialog.deepAnalysisOption')}} + + + {{t('vabI18n.knowledge.document.renameDialog.confirm')}} + + + +
+
+ +
+
+
+
+
+

{{ task.name }}

+

{{t('vabI18n.knowledge.document.deepAnalysisDialog.datasetName')}}: {{ task.datasetName }}

+

{{t('vabI18n.knowledge.document.deepAnalysisDialog.taskId')}}: {{ task.taskId }}

+

{{t('vabI18n.knowledge.document.deepAnalysisDialog.createTime')}}: {{ formatTimestampToLocaleString(task.createTime) }}

+
+
+ +
+
+
+
+
+ +
@@ -274,7 +313,7 @@ import { useRoute } from 'vue-router' import { ref, reactive } from 'vue' import VueOfficePdf from '@vue-office/pdf' -import { getDatasetDocPage, uploadDocument, deleteDocument, downloadDocument, previewDocumentUrl, renameDocument} from '@/api/dataset' +import { getDatasetDocPage, uploadDocument, deleteDocument, downloadDocument, previewDocumentUrl, renameDocument, getDeepAnalysisList } from '@/api/dataset' //引入VueOfficeDocx组件 import VueOfficeDocx from '@vue-office/docx' //引入相关样式 @@ -328,7 +367,8 @@ const uploadForm = reactive({ indexingTechnique: 'high_quality', preProcessingRules: ['remove_extra_spaces', 'remove_urls_emails'], segmentSeparator: '###', - segmentMaxTokens: 500 + segmentMaxTokens: 500, + deepAnalysis: false }) // 重命名相关 @@ -338,6 +378,21 @@ const renameForm = reactive({ newName: '' }) +// 深度解析相关 +const deepAnalysisDialogVisible = ref(false) +const deepAnalysisLoading = ref(false) +const deepAnalysisList = ref([]) + +// 定义深度解析任务类型 +interface PdfTask { + name: string + taskId: string + percent: number + datasetName: string + createTime: number + +} + const getFileTypeIcon = (fileType: string) => { // 使用动态导入获取图标路径 // const getIconUrl = (iconName: string) => { @@ -625,6 +680,7 @@ const handleUpload = async () => { datasetId: datasetId.value, indexingTechnique: uploadForm.indexingTechnique, processRule: processRule, + deepAnalysis: uploadForm.deepAnalysis })], { type: 'application/json' })) @@ -806,6 +862,34 @@ const handleRefresh = () => { fetchDocuments() } +// 深度解析方法 +const handleDeepAnalysis = async () => { + deepAnalysisDialogVisible.value = true + await fetchDeepAnalysisList() +} + +const fetchDeepAnalysisList = async () => { + deepAnalysisLoading.value = true + try { + const { data } = await getDeepAnalysisList() + deepAnalysisList.value = data || [] + } catch (error) { + console.error('获取深度解析任务列表失败:', error) + ElNotification({ + title: t('vabI18n.knowledge.document.errors.fetchDeepAnalysisFailed'), + message: error instanceof Error ? error.message : t('vabI18n.knowledge.document.messages.NoKnowError'), + type: 'error' + }) + } finally { + deepAnalysisLoading.value = false + } +} + +const formatTimestampToLocaleString = (timestamp: number): string => { + const date = new Date(timestamp * 1000) + return date.toLocaleString() +} + // 工具函数 const formatTimestamp = (timestamp: number): string => { const date = new Date(timestamp * 1000) @@ -1249,6 +1333,83 @@ const handleSearch = () => { to { transform: rotate(360deg); } } +// 深度解析对话框样式 +.deep-analysis-dialog { + ::v-deep .el-dialog__header { + background: linear-gradient(135deg, #409eff 0%, #67c23a 100%); + color: white; + border-radius: 8px 8px 0 0; + } + + ::v-deep .el-dialog__title { + color: white; + font-weight: 600; + } +} + +.empty-state { + padding: 40px 20px; + text-align: center; +} + +.task-list { + max-height: 400px; + overflow-y: auto; + padding: 10px 0; +} + +.task-item { + border: 1px solid #ebeef5; + border-radius: 8px; + margin-bottom: 12px; + padding: 16px; + background: #fafafa; + transition: all 0.3s ease; + + &:hover { + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1); + transform: translateY(-2px); + } +} + +.task-header { + display: flex; + justify-content: space-between; + align-items: flex-start; + gap: 20px; +} + +.task-info { + flex: 1; + + .task-name { + margin: 0 0 8px 0; + font-size: 16px; + font-weight: 600; + color: #303133; + word-break: break-all; + } + + .task-dataset, .task-id, .task-time { + margin: 4px 0; + font-size: 12px; + color: #909399; + } + + .task-dataset { + color: #67c23a; + font-weight: 500; + } +} + +.task-progress { + flex: 0 0 200px; + + ::v-deep .el-progress__text { + font-weight: 600; + } +} + // 响应式设计 @media (max-width: 768px) { .main-container { @@ -1274,5 +1435,14 @@ const handleSearch = () => { flex-direction: column; gap: 4px; } + + .task-header { + flex-direction: column; + gap: 12px; + } + + .task-progress { + flex: 1; + } } \ No newline at end of file diff --git a/chat-server/src/main/java/com/bjtds/brichat/service/task/PdfConversionTaskService.java b/chat-server/src/main/java/com/bjtds/brichat/service/task/PdfConversionTaskService.java new file mode 100644 index 0000000..3dec015 --- /dev/null +++ b/chat-server/src/main/java/com/bjtds/brichat/service/task/PdfConversionTaskService.java @@ -0,0 +1,402 @@ +package com.bjtds.brichat.service.task; + +import com.bjtds.brichat.entity.dataset.DocumentUploadReq; +import com.bjtds.brichat.entity.dataset.RetrievalModel; +import com.bjtds.brichat.entity.dto.PdfTaskDto; +import com.bjtds.brichat.entity.dto.PdfTaskStatusResponse; +import com.bjtds.brichat.service.dify.DifyDatasetApiService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.data.redis.core.RedisTemplate; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpMethod; +import org.springframework.http.ResponseEntity; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Service; +import org.springframework.util.StringUtils; +import org.springframework.web.client.RestTemplate; +import org.springframework.web.multipart.MultipartFile; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; + +/** + * PDF转换任务定时服务 + */ +@Service +public class PdfConversionTaskService { + + private static final Logger logger = LoggerFactory.getLogger(PdfConversionTaskService.class); + private static final String PDF_TASK_REDIS_KEY = "pdf:conversion:tasks"; + + @Autowired + private RestTemplate restTemplate; + + @Autowired + @Qualifier("redisTemplate") + private RedisTemplate redisTemplate; + + @Autowired + private DifyDatasetApiService difyDatasetApiService; + + @Value("${pdf.conversion.service.url}") + private String pdfConversionServiceUrl; + + /** + * 定时任务:每10秒检查一次PDF转换任务状态 + */ + @Scheduled(fixedRate = 10000) // 10秒执行一次 + public void checkPdfConversionTasks() { + try { + // 获取所有待处理的任务ID + List taskIds = redisTemplate.opsForList().range(PDF_TASK_REDIS_KEY + ":list", 0, -1); + + if (taskIds == null || taskIds.isEmpty()) { + logger.debug("没有待处理的PDF转换任务"); + return; + } + + logger.info("开始检查PDF转换任务状态,共{}个任务", taskIds.size()); + + for (Object taskIdObj : taskIds) { + String taskId = taskIdObj.toString(); + try { + checkSingleTask(taskId); + } catch (Exception e) { + logger.error("检查任务{}状态时发生错误: {}", taskId, e.getMessage(), e); + } + } + + } catch (Exception e) { + logger.error("定时检查PDF转换任务时发生错误: {}", e.getMessage(), e); + } + } + + /** + * 检查单个任务的状态 + */ + private void checkSingleTask(String taskId) { + try { + // 从Redis获取任务信息 + String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId; + PdfTaskDto taskInfo = (PdfTaskDto) redisTemplate.opsForHash().get(hashKey, "taskInfo"); + + if (taskInfo == null) { + logger.warn("任务{}的信息在Redis中不存在,从队列中移除", taskId); + removeTaskFromQueue(taskId); + return; + } + + // 调用状态查询接口 + PdfTaskStatusResponse statusResponse = queryTaskStatus(taskId); + + if (statusResponse == null) { + logger.warn("无法获取任务{}的状态信息", taskId); + return; + } + + logger.info("任务{}状态: {}, 进度: {}", taskId, statusResponse.getStatus(), + statusResponse.getProgress() != null ? statusResponse.getProgress().toString() : "无进度信息"); + + // 根据状态处理任务 + switch (statusResponse.getStatus()) { + case "SUCCESS": + handleSuccessTask(taskInfo, statusResponse); + break; + case "FAILURE": + handleFailedTask(taskInfo, statusResponse); + break; + case "STARTED": + case "PENDING": + updateTaskProgress(taskInfo, statusResponse); + break; + default: + logger.warn("任务{}状态未知: {}", taskId, statusResponse.getStatus()); + break; + } + + } catch (Exception e) { + logger.error("检查任务{}时发生错误: {}", taskId, e.getMessage(), e); + } + } + + /** + * 查询任务状态 + */ + private PdfTaskStatusResponse queryTaskStatus(String taskId) { + String url = pdfConversionServiceUrl + "/v1/pdf2md/status/" + taskId; + + HttpHeaders headers = new HttpHeaders(); + HttpEntity requestEntity = new HttpEntity<>(headers); + + try { + ResponseEntity response = restTemplate.exchange( + url, + HttpMethod.GET, + requestEntity, + PdfTaskStatusResponse.class + ); + + return response.getBody(); + + } catch (Exception e) { + logger.error("查询任务{}状态失败: {}", taskId, e.getMessage()); + return null; + } + } + + /** + * 处理成功完成的任务 + */ + private void handleSuccessTask(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) { + String taskId = taskInfo.getTaskId(); + String result = statusResponse.getResult(); + + logger.info("=== PDF转换任务完成 ==="); + logger.info("任务ID: {}", taskId); + logger.info("文件名: {}", taskInfo.getName()); + logger.info("数据集ID: {}", taskInfo.getDatasetId()); + + if (statusResponse.getProgress() != null) { + logger.info("转换进度: {}", statusResponse.getProgress().toString()); + } + + if (StringUtils.hasText(result)) { + logger.info("=== 转换结果开始 ==="); + logger.info("MD文件路径: {}", result); + logger.info("=== 转换结果结束 ==="); + + // 上传MD文件到Dify知识库 + try { + uploadMdFileToDifyDataset(taskInfo, result); + logger.info("MD文件已成功上传到知识库"); + } catch (Exception e) { + logger.error("上传MD文件到知识库失败: {}", e.getMessage(), e); + } + } else { + logger.warn("任务{}转换完成但结果为空", taskId); + } + + // 从队列中移除任务 + removeTaskFromQueue(taskId); + logger.info("任务{}已完成并从队列中移除", taskId); + } + + /** + * 处理失败的任务 + */ + private void handleFailedTask(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) { + String taskId = taskInfo.getTaskId(); + + logger.error("=== PDF转换任务失败 ==="); + logger.error("任务ID: {}", taskId); + logger.error("文件名: {}", taskInfo.getName()); + logger.error("数据集ID: {}", taskInfo.getDatasetId()); + + if (statusResponse.getError() != null) { + logger.error("错误信息: {}", statusResponse.getError()); + } + + // 从队列中移除失败的任务 + removeTaskFromQueue(taskId); + logger.info("失败任务{}已从队列中移除", taskId); + } + + /** + * 更新任务进度 + */ + private void updateTaskProgress(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) { + String taskId = taskInfo.getTaskId(); + + // 更新任务进度 + if (statusResponse.getProgress() != null) { + Double newPercent = statusResponse.getProgress().getPercent(); + taskInfo.setPercent(newPercent); + + // 更新Redis中的任务信息 + String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId; + redisTemplate.opsForHash().put(hashKey, "taskInfo", taskInfo); + + logger.debug("任务{}进度更新: {}%", taskId, String.format("%.1f", newPercent)); + } + } + + /** + * 从队列中移除任务 + */ + private void removeTaskFromQueue(String taskId) { + try { + // 从任务列表中移除 + redisTemplate.opsForList().remove(PDF_TASK_REDIS_KEY + ":list", 1, taskId); + + // 删除任务详细信息 + String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId; + redisTemplate.delete(hashKey); + + logger.debug("任务{}已从Redis队列中移除", taskId); + + } catch (Exception e) { + logger.error("移除任务{}时发生错误: {}", taskId, e.getMessage(), e); + } + } + + /** + * 上传MD文件到Dify知识库 + */ + private void uploadMdFileToDifyDataset(PdfTaskDto taskInfo, String mdFilePath) throws Exception { + logger.info("开始上传MD文件到知识库: filePath={}, datasetId={}", mdFilePath, taskInfo.getDatasetId()); + + // 1. 验证文件是否存在 + Path path = Paths.get(mdFilePath); + if (!Files.exists(path)) { + throw new RuntimeException("MD文件不存在: " + mdFilePath); + } + + // 2. 创建MultipartFile + MultipartFile multipartFile = createMultipartFileFromPath(path); + + + // 3. 调用上传接口 + ResponseEntity response = difyDatasetApiService.createDocumentByFile(taskInfo.getUploadReq(), multipartFile); + + if (response.getStatusCode().is2xxSuccessful()) { + logger.info("MD文件上传成功: taskId={}, fileName={}, response={}", + taskInfo.getTaskId(), taskInfo.getName(), response.getBody()); + } else { + throw new RuntimeException("MD文件上传失败,HTTP状态码: " + response.getStatusCode()); + } + } + + /** + * 从文件路径创建MultipartFile (JDK 1.8兼容版本) + */ + private MultipartFile createMultipartFileFromPath(Path filePath) throws IOException { + File file = filePath.toFile(); + String fileName = file.getName(); + String originalFileName = fileName.endsWith(".md") ? fileName : fileName + ".md"; + + // JDK 1.8兼容的文件读取方式 + byte[] content = readFileToByteArray(file); + + return new SimpleMultipartFile( + "file", + originalFileName, + "text/markdown", + content + ); + } + + /** + * JDK 1.8兼容的文件读取方法 + */ + private byte[] readFileToByteArray(File file) throws IOException { + try (FileInputStream fis = new FileInputStream(file); + ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = fis.read(buffer)) != -1) { + baos.write(buffer, 0, bytesRead); + } + return baos.toByteArray(); + } + } + + /** + * 简单的MultipartFile实现类 (JDK 1.8兼容) + */ + private static class SimpleMultipartFile implements MultipartFile { + private final String name; + private final String originalFilename; + private final String contentType; + private final byte[] content; + + public SimpleMultipartFile(String name, String originalFilename, String contentType, byte[] content) { + this.name = name; + this.originalFilename = originalFilename; + this.contentType = contentType; + this.content = content; + } + + @Override + public String getName() { + return name; + } + + @Override + public String getOriginalFilename() { + return originalFilename; + } + + @Override + public String getContentType() { + return contentType; + } + + @Override + public boolean isEmpty() { + return content.length == 0; + } + + @Override + public long getSize() { + return content.length; + } + + @Override + public byte[] getBytes() throws IOException { + return content; + } + + @Override + public InputStream getInputStream() throws IOException { + return new ByteArrayInputStream(content); + } + + @Override + public void transferTo(File dest) throws IOException, IllegalStateException { + try (FileOutputStream fos = new FileOutputStream(dest)) { + fos.write(content); + } + } + } + + /** + * 创建文档上传请求 + */ + private DocumentUploadReq createDocumentUploadRequest(String datasetId) { + DocumentUploadReq request = new DocumentUploadReq(); + request.setDatasetId(datasetId); + request.setDeepAnalysis(false); // MD文件不需要深度解析 + request.setIndexingTechnique("high_quality"); + request.setDocLanguage("chinese"); + + // 设置处理规则 + DocumentUploadReq.ProcessRule processRule = new DocumentUploadReq.ProcessRule(); + processRule.setMode("automatic"); + request.setProcessRule(processRule); + + // 设置检索模式(默认混合检索) + RetrievalModel retrievalModel = new RetrievalModel(); + retrievalModel.setSearchMethod("hybrid_search"); + retrievalModel.setRerankingEnable(true); + RetrievalModel.RerankingModel rerankingModel = new RetrievalModel.RerankingModel(); + rerankingModel.setRerankingModelName("bge-reanker-v2-m3"); + rerankingModel.setRerankingProviderName("langgenius/huggingface_tei/huggingface_tei"); + retrievalModel.setTopK(3); + retrievalModel.setRerankingModel(rerankingModel); + retrievalModel.setScoreThresholdEnabled(false); + retrievalModel.setScoreThreshold(0.5f); + request.setRetrievalModel(retrievalModel); + + return request; + } +} \ No newline at end of file