新增深度解析接口
This commit is contained in:
parent
9ccaa56da3
commit
6ef06e12a9
|
@ -2,17 +2,17 @@
|
|||
# 此文件修改后需要重启项目
|
||||
NODE_ENV=development
|
||||
#VUE_APP_BASE_URL='/vab-mock-server'
|
||||
VUE_APP_BASE_URL='http://localhost:10001'
|
||||
VUE_APP_API_BASE_URL='http://localhost:8080'
|
||||
# VUE_APP_BASE_URL='http://localhost:10001'
|
||||
# VUE_APP_API_BASE_URL='http://localhost:8080'
|
||||
#北京服务器配置
|
||||
# VUE_APP_BASE_URL='http://192.168.1.211:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.1.211:80/brichat'
|
||||
# #武汉公司服务器
|
||||
# VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
#总部服务器配置
|
||||
# VUE_APP_BASE_URL='http://192.168.0.33:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.0.33:80/brichat'
|
||||
|
||||
VUE_APP_GITHUB_USER_NAME=test
|
||||
VUE_APP_SECRET_KEY=preview
|
||||
# VUE_APP_GITHUB_USER_NAME=test
|
||||
# VUE_APP_SECRET_KEY=preview
|
||||
|
|
|
@ -2,18 +2,18 @@
|
|||
# 此文件修改后需要重启项目
|
||||
NODE_ENV=production
|
||||
# VUE_APP_BASE_URL='/vab-mock-server'
|
||||
VUE_APP_BASE_URL='http://localhost:10001'
|
||||
VUE_APP_API_BASE_URL='http://localhost:10001'
|
||||
# VUE_APP_BASE_URL='http://localhost:10001'
|
||||
# VUE_APP_API_BASE_URL='http://localhost:10001'
|
||||
#北京服务器
|
||||
# VUE_APP_BASE_URL='http://192.168.1.211:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.1.211:80/brichat'
|
||||
#武汉公司服务器
|
||||
# VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
|
||||
# 总部服务器
|
||||
# VUE_APP_BASE_URL='http://192.168.0.33:80/brichat'
|
||||
# VUE_APP_API_BASE_URL='http://192.168.0.33:80/brichat'
|
||||
|
||||
|
||||
VUE_APP_GITHUB_USER_NAME=test
|
||||
VUE_APP_SECRET_KEY=preview
|
||||
# VUE_APP_GITHUB_USER_NAME=test
|
||||
# VUE_APP_SECRET_KEY=preview
|
|
@ -138,4 +138,14 @@ export const deleteDataset = (id: string) => {
|
|||
datasetId
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取深度解析任务列表
|
||||
*/
|
||||
export const getDeepAnalysisList = () => {
|
||||
return request({
|
||||
url: '/brichat-service/datasetManage/document/deepAnalysisList',
|
||||
method: 'get'
|
||||
})
|
||||
}
|
|
@ -295,6 +295,7 @@ export default {
|
|||
download: 'Download',
|
||||
rename: 'Rename',
|
||||
delete: 'Delete',
|
||||
deepAnalysis: 'Deep Analysis',
|
||||
},
|
||||
search: {
|
||||
placeholder: 'Enter keywords to search',
|
||||
|
@ -314,6 +315,8 @@ export default {
|
|||
segmentation: 'Segmentation Rules',
|
||||
separatorPlaceholder: 'Separator (default: ###)',
|
||||
maxTokens: 'Max Tokens',
|
||||
deepAnalysis: 'Deep Analysis',
|
||||
deepAnalysisOption: 'Enable deep analysis to improve document understanding accuracy',
|
||||
selectFile: 'Select the file',
|
||||
fileTip:
|
||||
'Multiple selection supported, max 100MB per file. Accepted formats: TXT, MD, MARKDOWN, MDX, PDF, HTML, HTM, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES',
|
||||
|
@ -331,6 +334,14 @@ export default {
|
|||
confirm: 'Confirm',
|
||||
deleteconfirm: 'Delete Confirm',
|
||||
},
|
||||
deepAnalysisDialog: {
|
||||
title: 'Deep Analysis Task List',
|
||||
noTasks: 'No deep analysis tasks are currently in progress',
|
||||
datasetName: 'Dataset',
|
||||
taskId: 'Task ID',
|
||||
createTime: 'Create Time',
|
||||
close: 'Close',
|
||||
},
|
||||
messages: {
|
||||
FilenamecantEmpty: 'Filename cannot be empty',
|
||||
uploadSuccess: 'Upload successfully',
|
||||
|
@ -359,6 +370,7 @@ export default {
|
|||
fetchFailed: 'Failed to get document list: ',
|
||||
previewFailed: 'Preview failed: ',
|
||||
downloadFailed: 'Download failed: ',
|
||||
fetchDeepAnalysisFailed: 'Failed to get deep analysis task list',
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -515,70 +527,35 @@ export default {
|
|||
Tip: 'Tip',
|
||||
selectDeleteData: 'Please select the data to be deleted',
|
||||
confirmDeleteApiKeys: 'This operation will permanently delete selected ',
|
||||
confirmDeleteApiKeysEnd: ' API key, are you sure?',
|
||||
confirmDeleteApiKeysEndPlural: ' API keys, are you sure?',
|
||||
confirmDeleteApiKeysEnd:' API key, are you sure?',
|
||||
confirmDeleteApiKeysEndPlural:' API keys, are you sure?',
|
||||
confirmRefreshCache: 'This operation will refresh Redis cache, reloading all API keys, are you sure?',
|
||||
updateSuccess: 'Update Successfully',
|
||||
updateFail: 'Update Failed',
|
||||
addSuccess: 'Add Successfully',
|
||||
addFail: 'Add Failed',
|
||||
addFail:'Add Failed'
|
||||
},
|
||||
prologue: {
|
||||
title: 'Opening Statement Management',
|
||||
query: 'Query',
|
||||
save: 'Save',
|
||||
prologue:{
|
||||
chatType: 'Chat Type',
|
||||
chatTypes: {
|
||||
'1': 'Fault Diagnosis',
|
||||
'2': 'Intelligent Q&A',
|
||||
'3': 'Chart Report',
|
||||
'4': 'Emergency Assistant Q&A',
|
||||
'5': 'Diagnostic Code Lookup',
|
||||
},
|
||||
openingContent: 'Opening Statement Content',
|
||||
addRecommend: 'Add Recommended Question',
|
||||
recommendation: 'Recommended Question',
|
||||
action: 'Action',
|
||||
selectChatType: 'Select Chat Type',
|
||||
prologueContent: 'Prologue Content',
|
||||
addRecommend: 'Add Recommendation',
|
||||
recommendQuestion: 'Recommended Question',
|
||||
actions: 'Actions',
|
||||
edit: 'Edit',
|
||||
delete: 'Delete',
|
||||
dialog: {
|
||||
addTitle: 'Add Recommended Question',
|
||||
editTitle: 'Edit Recommended Question',
|
||||
questionContent: 'Recommended Question',
|
||||
cancel: 'Cancel',
|
||||
confirm: 'Confirm',
|
||||
save: 'Save',
|
||||
},
|
||||
message: {
|
||||
loadSuccess: 'Loaded successfully',
|
||||
loadFailed: 'Load failed, please try again later',
|
||||
loadFailedMess: 'Failed to load recommended questions',
|
||||
notFound: 'Opening statement not found',
|
||||
saveSuccess: 'Saved successfully',
|
||||
saveFailed: 'Save failed, please try again later',
|
||||
addSuccess: 'Recommended question added successfully',
|
||||
addFailed: 'Failed to add recommended question',
|
||||
editSuccess: 'Recommended question edited successfully',
|
||||
editFailed: 'Failed to edit recommended question, please try again later',
|
||||
deleteSuccess: 'Deleted successfully',
|
||||
deleteFailed: 'Deletion failed',
|
||||
},
|
||||
confirm: {
|
||||
save: 'Are you sure you want to save the changes?',
|
||||
tips: 'Tips',
|
||||
delete: 'Are you sure you want to delete this recommended question?',
|
||||
deleteConfirm: 'Delete Confirmation',
|
||||
},
|
||||
placeholder: {
|
||||
selectType: 'Select chat type',
|
||||
},
|
||||
},
|
||||
vabtabs: {
|
||||
refresh: 'Refresh',
|
||||
closeOthers: 'Close Others',
|
||||
closeLeft: 'Close Left',
|
||||
closeRight: 'Close Right',
|
||||
closeAll: 'Close All',
|
||||
},
|
||||
confirmDelete: 'Are you sure you want to delete this recommendation?',
|
||||
confirmSave: 'Are you sure you want to save the changes?',
|
||||
deleteSuccess: 'Deleted successfully',
|
||||
deleteFailure: 'Failed to delete, please try again later',
|
||||
saveSuccess: 'Saved successfully',
|
||||
saveFailure: 'Failed to save, please try again later',
|
||||
loadSuccess: 'Loaded successfully',
|
||||
loadFailure: 'Failed to load, please try again later',
|
||||
noContent: 'No content found',
|
||||
loading: 'Loading...',
|
||||
updateRecommendation: 'Update Recommendation',
|
||||
addRecommendation: 'Add Recommendation',
|
||||
}
|
||||
},
|
||||
}
|
||||
|
|
|
@ -303,6 +303,7 @@ export default {
|
|||
download: '下载',
|
||||
rename: '重命名',
|
||||
delete: '删除',
|
||||
deepAnalysis: '深度解析',
|
||||
},
|
||||
search: {
|
||||
placeholder: '请输入关键词查询',
|
||||
|
@ -322,6 +323,8 @@ export default {
|
|||
segmentation: '分段规则',
|
||||
separatorPlaceholder: '分隔符(默认###)',
|
||||
maxTokens: '最大Token数',
|
||||
deepAnalysis: '深度解析',
|
||||
deepAnalysisOption: '启用深度解析,提高文档理解精度',
|
||||
selectFile: '选择文件',
|
||||
fileTip: '支持多选,单个文件不超过100MB,可接受格式:TXT、MD、MARKDOWN、MDX、PDF、HTML、HTM、XLSX、XLS、DOCX、CSV、VTT、PROPERTIES',
|
||||
cancel: '取消',
|
||||
|
@ -338,6 +341,14 @@ export default {
|
|||
confirm: '确定',
|
||||
deleteconfirm: '删除确认',
|
||||
},
|
||||
deepAnalysisDialog: {
|
||||
title: '深度解析任务列表',
|
||||
noTasks: '当前没有正在进行的深度解析任务',
|
||||
datasetName: '知识库',
|
||||
taskId: '任务ID',
|
||||
createTime: '创建时间',
|
||||
close: '关闭',
|
||||
},
|
||||
messages: {
|
||||
FilenamecantEmpty: '文件名不能为空',
|
||||
uploadSuccess: '上传成功',
|
||||
|
@ -367,6 +378,7 @@ export default {
|
|||
fetchFailed: '获取文档列表失败: ',
|
||||
previewFailed: '预览失败: ',
|
||||
downloadFailed: '下载失败: ',
|
||||
fetchDeepAnalysisFailed: '获取深度解析任务列表失败',
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
{{t('vabI18n.knowledge.document.buttons.batchDelete', { count: selectedRows.length })}}
|
||||
</el-button>
|
||||
<el-button :icon="Refresh" @click="handleRefresh">{{t('vabI18n.knowledge.document.buttons.refresh')}}</el-button>
|
||||
<el-button type="info" @click="handleDeepAnalysis">{{t('vabI18n.knowledge.document.buttons.deepAnalysis')}}</el-button>
|
||||
<el-button type="primary" :icon="Upload" @click="triggerFileInput">{{t('vabI18n.knowledge.document.buttons.upload')}}</el-button>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -186,6 +187,12 @@
|
|||
</div>
|
||||
</el-form-item>
|
||||
|
||||
<el-form-item :label="t('vabI18n.knowledge.document.uploadDialog.deepAnalysis')" label-width="auto">
|
||||
<el-checkbox v-model="uploadForm.deepAnalysis">
|
||||
{{t('vabI18n.knowledge.document.uploadDialog.deepAnalysisOption')}}
|
||||
</el-checkbox>
|
||||
</el-form-item>
|
||||
|
||||
<el-upload
|
||||
ref="uploadRef"
|
||||
multiple
|
||||
|
@ -267,6 +274,38 @@
|
|||
<el-button type="primary" @click="handleRenameConfirm">{{t('vabI18n.knowledge.document.renameDialog.confirm')}}</el-button>
|
||||
</template>
|
||||
</el-dialog>
|
||||
|
||||
<!-- 深度解析任务列表对话框 -->
|
||||
<el-dialog v-model="deepAnalysisDialogVisible" :title="t('vabI18n.knowledge.document.deepAnalysisDialog.title')" width="800px" class="deep-analysis-dialog">
|
||||
<div v-loading="deepAnalysisLoading" :element-loading-text="t('vabI18n.knowledge.document.messages.loading')">
|
||||
<div v-if="deepAnalysisList.length === 0 && !deepAnalysisLoading" class="empty-state">
|
||||
<el-empty :description="t('vabI18n.knowledge.document.deepAnalysisDialog.noTasks')" />
|
||||
</div>
|
||||
<div v-else class="task-list">
|
||||
<div v-for="task in deepAnalysisList" :key="task.taskId" class="task-item">
|
||||
<div class="task-header">
|
||||
<div class="task-info">
|
||||
<h4 class="task-name">{{ task.name }}</h4>
|
||||
<p class="task-dataset">{{t('vabI18n.knowledge.document.deepAnalysisDialog.datasetName')}}: {{ task.datasetName }}</p>
|
||||
<p class="task-id">{{t('vabI18n.knowledge.document.deepAnalysisDialog.taskId')}}: {{ task.taskId }}</p>
|
||||
<p class="task-time">{{t('vabI18n.knowledge.document.deepAnalysisDialog.createTime')}}: {{ formatTimestampToLocaleString(task.createTime) }}</p>
|
||||
</div>
|
||||
<div class="task-progress">
|
||||
<el-progress
|
||||
:percentage="Math.round(task.percent * 100)"
|
||||
:status="task.percent >= 1 ? 'success' : 'warning'"
|
||||
:stroke-width="8"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<template #footer>
|
||||
<el-button @click="fetchDeepAnalysisList" :loading="deepAnalysisLoading">{{t('vabI18n.knowledge.document.buttons.refresh')}}</el-button>
|
||||
<el-button type="primary" @click="deepAnalysisDialogVisible = false">{{t('vabI18n.knowledge.document.deepAnalysisDialog.close')}}</el-button>
|
||||
</template>
|
||||
</el-dialog>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
|
@ -274,7 +313,7 @@
|
|||
import { useRoute } from 'vue-router'
|
||||
import { ref, reactive } from 'vue'
|
||||
import VueOfficePdf from '@vue-office/pdf'
|
||||
import { getDatasetDocPage, uploadDocument, deleteDocument, downloadDocument, previewDocumentUrl, renameDocument} from '@/api/dataset'
|
||||
import { getDatasetDocPage, uploadDocument, deleteDocument, downloadDocument, previewDocumentUrl, renameDocument, getDeepAnalysisList } from '@/api/dataset'
|
||||
//引入VueOfficeDocx组件
|
||||
import VueOfficeDocx from '@vue-office/docx'
|
||||
//引入相关样式
|
||||
|
@ -328,7 +367,8 @@ const uploadForm = reactive({
|
|||
indexingTechnique: 'high_quality',
|
||||
preProcessingRules: ['remove_extra_spaces', 'remove_urls_emails'],
|
||||
segmentSeparator: '###',
|
||||
segmentMaxTokens: 500
|
||||
segmentMaxTokens: 500,
|
||||
deepAnalysis: false
|
||||
})
|
||||
|
||||
// 重命名相关
|
||||
|
@ -338,6 +378,21 @@ const renameForm = reactive({
|
|||
newName: ''
|
||||
})
|
||||
|
||||
// 深度解析相关
|
||||
const deepAnalysisDialogVisible = ref(false)
|
||||
const deepAnalysisLoading = ref(false)
|
||||
const deepAnalysisList = ref<PdfTask[]>([])
|
||||
|
||||
// 定义深度解析任务类型
|
||||
interface PdfTask {
|
||||
name: string
|
||||
taskId: string
|
||||
percent: number
|
||||
datasetName: string
|
||||
createTime: number
|
||||
|
||||
}
|
||||
|
||||
const getFileTypeIcon = (fileType: string) => {
|
||||
// 使用动态导入获取图标路径
|
||||
// const getIconUrl = (iconName: string) => {
|
||||
|
@ -625,6 +680,7 @@ const handleUpload = async () => {
|
|||
datasetId: datasetId.value,
|
||||
indexingTechnique: uploadForm.indexingTechnique,
|
||||
processRule: processRule,
|
||||
deepAnalysis: uploadForm.deepAnalysis
|
||||
})], {
|
||||
type: 'application/json'
|
||||
}))
|
||||
|
@ -806,6 +862,34 @@ const handleRefresh = () => {
|
|||
fetchDocuments()
|
||||
}
|
||||
|
||||
// 深度解析方法
|
||||
const handleDeepAnalysis = async () => {
|
||||
deepAnalysisDialogVisible.value = true
|
||||
await fetchDeepAnalysisList()
|
||||
}
|
||||
|
||||
const fetchDeepAnalysisList = async () => {
|
||||
deepAnalysisLoading.value = true
|
||||
try {
|
||||
const { data } = await getDeepAnalysisList()
|
||||
deepAnalysisList.value = data || []
|
||||
} catch (error) {
|
||||
console.error('获取深度解析任务列表失败:', error)
|
||||
ElNotification({
|
||||
title: t('vabI18n.knowledge.document.errors.fetchDeepAnalysisFailed'),
|
||||
message: error instanceof Error ? error.message : t('vabI18n.knowledge.document.messages.NoKnowError'),
|
||||
type: 'error'
|
||||
})
|
||||
} finally {
|
||||
deepAnalysisLoading.value = false
|
||||
}
|
||||
}
|
||||
|
||||
const formatTimestampToLocaleString = (timestamp: number): string => {
|
||||
const date = new Date(timestamp * 1000)
|
||||
return date.toLocaleString()
|
||||
}
|
||||
|
||||
// 工具函数
|
||||
const formatTimestamp = (timestamp: number): string => {
|
||||
const date = new Date(timestamp * 1000)
|
||||
|
@ -1249,6 +1333,83 @@ const handleSearch = () => {
|
|||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
// 深度解析对话框样式
|
||||
.deep-analysis-dialog {
|
||||
::v-deep .el-dialog__header {
|
||||
background: linear-gradient(135deg, #409eff 0%, #67c23a 100%);
|
||||
color: white;
|
||||
border-radius: 8px 8px 0 0;
|
||||
}
|
||||
|
||||
::v-deep .el-dialog__title {
|
||||
color: white;
|
||||
font-weight: 600;
|
||||
}
|
||||
}
|
||||
|
||||
.empty-state {
|
||||
padding: 40px 20px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.task-list {
|
||||
max-height: 400px;
|
||||
overflow-y: auto;
|
||||
padding: 10px 0;
|
||||
}
|
||||
|
||||
.task-item {
|
||||
border: 1px solid #ebeef5;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 12px;
|
||||
padding: 16px;
|
||||
background: #fafafa;
|
||||
transition: all 0.3s ease;
|
||||
|
||||
&:hover {
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
}
|
||||
|
||||
.task-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: flex-start;
|
||||
gap: 20px;
|
||||
}
|
||||
|
||||
.task-info {
|
||||
flex: 1;
|
||||
|
||||
.task-name {
|
||||
margin: 0 0 8px 0;
|
||||
font-size: 16px;
|
||||
font-weight: 600;
|
||||
color: #303133;
|
||||
word-break: break-all;
|
||||
}
|
||||
|
||||
.task-dataset, .task-id, .task-time {
|
||||
margin: 4px 0;
|
||||
font-size: 12px;
|
||||
color: #909399;
|
||||
}
|
||||
|
||||
.task-dataset {
|
||||
color: #67c23a;
|
||||
font-weight: 500;
|
||||
}
|
||||
}
|
||||
|
||||
.task-progress {
|
||||
flex: 0 0 200px;
|
||||
|
||||
::v-deep .el-progress__text {
|
||||
font-weight: 600;
|
||||
}
|
||||
}
|
||||
|
||||
// 响应式设计
|
||||
@media (max-width: 768px) {
|
||||
.main-container {
|
||||
|
@ -1274,5 +1435,14 @@ const handleSearch = () => {
|
|||
flex-direction: column;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
.task-header {
|
||||
flex-direction: column;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.task-progress {
|
||||
flex: 1;
|
||||
}
|
||||
}
|
||||
</style>
|
|
@ -0,0 +1,402 @@
|
|||
package com.bjtds.brichat.service.task;
|
||||
|
||||
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
||||
import com.bjtds.brichat.entity.dataset.RetrievalModel;
|
||||
import com.bjtds.brichat.entity.dto.PdfTaskDto;
|
||||
import com.bjtds.brichat.entity.dto.PdfTaskStatusResponse;
|
||||
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.data.redis.core.RedisTemplate;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.StringUtils;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* PDF转换任务定时服务
|
||||
*/
|
||||
@Service
|
||||
public class PdfConversionTaskService {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(PdfConversionTaskService.class);
|
||||
private static final String PDF_TASK_REDIS_KEY = "pdf:conversion:tasks";
|
||||
|
||||
@Autowired
|
||||
private RestTemplate restTemplate;
|
||||
|
||||
@Autowired
|
||||
@Qualifier("redisTemplate")
|
||||
private RedisTemplate<String, Object> redisTemplate;
|
||||
|
||||
@Autowired
|
||||
private DifyDatasetApiService difyDatasetApiService;
|
||||
|
||||
@Value("${pdf.conversion.service.url}")
|
||||
private String pdfConversionServiceUrl;
|
||||
|
||||
/**
|
||||
* 定时任务:每10秒检查一次PDF转换任务状态
|
||||
*/
|
||||
@Scheduled(fixedRate = 10000) // 10秒执行一次
|
||||
public void checkPdfConversionTasks() {
|
||||
try {
|
||||
// 获取所有待处理的任务ID
|
||||
List<Object> taskIds = redisTemplate.opsForList().range(PDF_TASK_REDIS_KEY + ":list", 0, -1);
|
||||
|
||||
if (taskIds == null || taskIds.isEmpty()) {
|
||||
logger.debug("没有待处理的PDF转换任务");
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info("开始检查PDF转换任务状态,共{}个任务", taskIds.size());
|
||||
|
||||
for (Object taskIdObj : taskIds) {
|
||||
String taskId = taskIdObj.toString();
|
||||
try {
|
||||
checkSingleTask(taskId);
|
||||
} catch (Exception e) {
|
||||
logger.error("检查任务{}状态时发生错误: {}", taskId, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("定时检查PDF转换任务时发生错误: {}", e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查单个任务的状态
|
||||
*/
|
||||
private void checkSingleTask(String taskId) {
|
||||
try {
|
||||
// 从Redis获取任务信息
|
||||
String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId;
|
||||
PdfTaskDto taskInfo = (PdfTaskDto) redisTemplate.opsForHash().get(hashKey, "taskInfo");
|
||||
|
||||
if (taskInfo == null) {
|
||||
logger.warn("任务{}的信息在Redis中不存在,从队列中移除", taskId);
|
||||
removeTaskFromQueue(taskId);
|
||||
return;
|
||||
}
|
||||
|
||||
// 调用状态查询接口
|
||||
PdfTaskStatusResponse statusResponse = queryTaskStatus(taskId);
|
||||
|
||||
if (statusResponse == null) {
|
||||
logger.warn("无法获取任务{}的状态信息", taskId);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info("任务{}状态: {}, 进度: {}", taskId, statusResponse.getStatus(),
|
||||
statusResponse.getProgress() != null ? statusResponse.getProgress().toString() : "无进度信息");
|
||||
|
||||
// 根据状态处理任务
|
||||
switch (statusResponse.getStatus()) {
|
||||
case "SUCCESS":
|
||||
handleSuccessTask(taskInfo, statusResponse);
|
||||
break;
|
||||
case "FAILURE":
|
||||
handleFailedTask(taskInfo, statusResponse);
|
||||
break;
|
||||
case "STARTED":
|
||||
case "PENDING":
|
||||
updateTaskProgress(taskInfo, statusResponse);
|
||||
break;
|
||||
default:
|
||||
logger.warn("任务{}状态未知: {}", taskId, statusResponse.getStatus());
|
||||
break;
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("检查任务{}时发生错误: {}", taskId, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询任务状态
|
||||
*/
|
||||
private PdfTaskStatusResponse queryTaskStatus(String taskId) {
|
||||
String url = pdfConversionServiceUrl + "/v1/pdf2md/status/" + taskId;
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
HttpEntity<?> requestEntity = new HttpEntity<>(headers);
|
||||
|
||||
try {
|
||||
ResponseEntity<PdfTaskStatusResponse> response = restTemplate.exchange(
|
||||
url,
|
||||
HttpMethod.GET,
|
||||
requestEntity,
|
||||
PdfTaskStatusResponse.class
|
||||
);
|
||||
|
||||
return response.getBody();
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("查询任务{}状态失败: {}", taskId, e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理成功完成的任务
|
||||
*/
|
||||
private void handleSuccessTask(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) {
|
||||
String taskId = taskInfo.getTaskId();
|
||||
String result = statusResponse.getResult();
|
||||
|
||||
logger.info("=== PDF转换任务完成 ===");
|
||||
logger.info("任务ID: {}", taskId);
|
||||
logger.info("文件名: {}", taskInfo.getName());
|
||||
logger.info("数据集ID: {}", taskInfo.getDatasetId());
|
||||
|
||||
if (statusResponse.getProgress() != null) {
|
||||
logger.info("转换进度: {}", statusResponse.getProgress().toString());
|
||||
}
|
||||
|
||||
if (StringUtils.hasText(result)) {
|
||||
logger.info("=== 转换结果开始 ===");
|
||||
logger.info("MD文件路径: {}", result);
|
||||
logger.info("=== 转换结果结束 ===");
|
||||
|
||||
// 上传MD文件到Dify知识库
|
||||
try {
|
||||
uploadMdFileToDifyDataset(taskInfo, result);
|
||||
logger.info("MD文件已成功上传到知识库");
|
||||
} catch (Exception e) {
|
||||
logger.error("上传MD文件到知识库失败: {}", e.getMessage(), e);
|
||||
}
|
||||
} else {
|
||||
logger.warn("任务{}转换完成但结果为空", taskId);
|
||||
}
|
||||
|
||||
// 从队列中移除任务
|
||||
removeTaskFromQueue(taskId);
|
||||
logger.info("任务{}已完成并从队列中移除", taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理失败的任务
|
||||
*/
|
||||
private void handleFailedTask(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) {
|
||||
String taskId = taskInfo.getTaskId();
|
||||
|
||||
logger.error("=== PDF转换任务失败 ===");
|
||||
logger.error("任务ID: {}", taskId);
|
||||
logger.error("文件名: {}", taskInfo.getName());
|
||||
logger.error("数据集ID: {}", taskInfo.getDatasetId());
|
||||
|
||||
if (statusResponse.getError() != null) {
|
||||
logger.error("错误信息: {}", statusResponse.getError());
|
||||
}
|
||||
|
||||
// 从队列中移除失败的任务
|
||||
removeTaskFromQueue(taskId);
|
||||
logger.info("失败任务{}已从队列中移除", taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新任务进度
|
||||
*/
|
||||
private void updateTaskProgress(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) {
|
||||
String taskId = taskInfo.getTaskId();
|
||||
|
||||
// 更新任务进度
|
||||
if (statusResponse.getProgress() != null) {
|
||||
Double newPercent = statusResponse.getProgress().getPercent();
|
||||
taskInfo.setPercent(newPercent);
|
||||
|
||||
// 更新Redis中的任务信息
|
||||
String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId;
|
||||
redisTemplate.opsForHash().put(hashKey, "taskInfo", taskInfo);
|
||||
|
||||
logger.debug("任务{}进度更新: {}%", taskId, String.format("%.1f", newPercent));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从队列中移除任务
|
||||
*/
|
||||
private void removeTaskFromQueue(String taskId) {
|
||||
try {
|
||||
// 从任务列表中移除
|
||||
redisTemplate.opsForList().remove(PDF_TASK_REDIS_KEY + ":list", 1, taskId);
|
||||
|
||||
// 删除任务详细信息
|
||||
String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId;
|
||||
redisTemplate.delete(hashKey);
|
||||
|
||||
logger.debug("任务{}已从Redis队列中移除", taskId);
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("移除任务{}时发生错误: {}", taskId, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传MD文件到Dify知识库
|
||||
*/
|
||||
private void uploadMdFileToDifyDataset(PdfTaskDto taskInfo, String mdFilePath) throws Exception {
|
||||
logger.info("开始上传MD文件到知识库: filePath={}, datasetId={}", mdFilePath, taskInfo.getDatasetId());
|
||||
|
||||
// 1. 验证文件是否存在
|
||||
Path path = Paths.get(mdFilePath);
|
||||
if (!Files.exists(path)) {
|
||||
throw new RuntimeException("MD文件不存在: " + mdFilePath);
|
||||
}
|
||||
|
||||
// 2. 创建MultipartFile
|
||||
MultipartFile multipartFile = createMultipartFileFromPath(path);
|
||||
|
||||
|
||||
// 3. 调用上传接口
|
||||
ResponseEntity<Map> response = difyDatasetApiService.createDocumentByFile(taskInfo.getUploadReq(), multipartFile);
|
||||
|
||||
if (response.getStatusCode().is2xxSuccessful()) {
|
||||
logger.info("MD文件上传成功: taskId={}, fileName={}, response={}",
|
||||
taskInfo.getTaskId(), taskInfo.getName(), response.getBody());
|
||||
} else {
|
||||
throw new RuntimeException("MD文件上传失败,HTTP状态码: " + response.getStatusCode());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从文件路径创建MultipartFile (JDK 1.8兼容版本)
|
||||
*/
|
||||
private MultipartFile createMultipartFileFromPath(Path filePath) throws IOException {
|
||||
File file = filePath.toFile();
|
||||
String fileName = file.getName();
|
||||
String originalFileName = fileName.endsWith(".md") ? fileName : fileName + ".md";
|
||||
|
||||
// JDK 1.8兼容的文件读取方式
|
||||
byte[] content = readFileToByteArray(file);
|
||||
|
||||
return new SimpleMultipartFile(
|
||||
"file",
|
||||
originalFileName,
|
||||
"text/markdown",
|
||||
content
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* JDK 1.8兼容的文件读取方法
|
||||
*/
|
||||
private byte[] readFileToByteArray(File file) throws IOException {
|
||||
try (FileInputStream fis = new FileInputStream(file);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
|
||||
byte[] buffer = new byte[8192];
|
||||
int bytesRead;
|
||||
while ((bytesRead = fis.read(buffer)) != -1) {
|
||||
baos.write(buffer, 0, bytesRead);
|
||||
}
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 简单的MultipartFile实现类 (JDK 1.8兼容)
|
||||
*/
|
||||
private static class SimpleMultipartFile implements MultipartFile {
|
||||
private final String name;
|
||||
private final String originalFilename;
|
||||
private final String contentType;
|
||||
private final byte[] content;
|
||||
|
||||
public SimpleMultipartFile(String name, String originalFilename, String contentType, byte[] content) {
|
||||
this.name = name;
|
||||
this.originalFilename = originalFilename;
|
||||
this.contentType = contentType;
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getOriginalFilename() {
|
||||
return originalFilename;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getContentType() {
|
||||
return contentType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return content.length == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSize() {
|
||||
return content.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getBytes() throws IOException {
|
||||
return content;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream getInputStream() throws IOException {
|
||||
return new ByteArrayInputStream(content);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transferTo(File dest) throws IOException, IllegalStateException {
|
||||
try (FileOutputStream fos = new FileOutputStream(dest)) {
|
||||
fos.write(content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建文档上传请求
|
||||
*/
|
||||
private DocumentUploadReq createDocumentUploadRequest(String datasetId) {
|
||||
DocumentUploadReq request = new DocumentUploadReq();
|
||||
request.setDatasetId(datasetId);
|
||||
request.setDeepAnalysis(false); // MD文件不需要深度解析
|
||||
request.setIndexingTechnique("high_quality");
|
||||
request.setDocLanguage("chinese");
|
||||
|
||||
// 设置处理规则
|
||||
DocumentUploadReq.ProcessRule processRule = new DocumentUploadReq.ProcessRule();
|
||||
processRule.setMode("automatic");
|
||||
request.setProcessRule(processRule);
|
||||
|
||||
// 设置检索模式(默认混合检索)
|
||||
RetrievalModel retrievalModel = new RetrievalModel();
|
||||
retrievalModel.setSearchMethod("hybrid_search");
|
||||
retrievalModel.setRerankingEnable(true);
|
||||
RetrievalModel.RerankingModel rerankingModel = new RetrievalModel.RerankingModel();
|
||||
rerankingModel.setRerankingModelName("bge-reanker-v2-m3");
|
||||
rerankingModel.setRerankingProviderName("langgenius/huggingface_tei/huggingface_tei");
|
||||
retrievalModel.setTopK(3);
|
||||
retrievalModel.setRerankingModel(rerankingModel);
|
||||
retrievalModel.setScoreThresholdEnabled(false);
|
||||
retrievalModel.setScoreThreshold(0.5f);
|
||||
request.setRetrievalModel(retrievalModel);
|
||||
|
||||
return request;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue