新增深度解析接口

This commit is contained in:
wenjinbo 2025-07-29 15:53:14 +08:00
parent 9ccaa56da3
commit 6ef06e12a9
7 changed files with 642 additions and 71 deletions

View File

@ -2,17 +2,17 @@
# 此文件修改后需要重启项目
NODE_ENV=development
#VUE_APP_BASE_URL='/vab-mock-server'
VUE_APP_BASE_URL='http://localhost:10001'
VUE_APP_API_BASE_URL='http://localhost:8080'
# VUE_APP_BASE_URL='http://localhost:10001'
# VUE_APP_API_BASE_URL='http://localhost:8080'
#北京服务器配置
# VUE_APP_BASE_URL='http://192.168.1.211:80/brichat'
# VUE_APP_API_BASE_URL='http://192.168.1.211:80/brichat'
# #武汉公司服务器
# VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
# VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
#总部服务器配置
# VUE_APP_BASE_URL='http://192.168.0.33:80/brichat'
# VUE_APP_API_BASE_URL='http://192.168.0.33:80/brichat'
VUE_APP_GITHUB_USER_NAME=test
VUE_APP_SECRET_KEY=preview
# VUE_APP_GITHUB_USER_NAME=test
# VUE_APP_SECRET_KEY=preview

View File

@ -2,18 +2,18 @@
# 此文件修改后需要重启项目
NODE_ENV=production
# VUE_APP_BASE_URL='/vab-mock-server'
VUE_APP_BASE_URL='http://localhost:10001'
VUE_APP_API_BASE_URL='http://localhost:10001'
# VUE_APP_BASE_URL='http://localhost:10001'
# VUE_APP_API_BASE_URL='http://localhost:10001'
#北京服务器
# VUE_APP_BASE_URL='http://192.168.1.211:80/brichat'
# VUE_APP_API_BASE_URL='http://192.168.1.211:80/brichat'
#武汉公司服务器
# VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
# VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
VUE_APP_BASE_URL='http://192.168.8.253:80/brichat'
VUE_APP_API_BASE_URL='http://192.168.8.253:80/brichat'
# 总部服务器
# VUE_APP_BASE_URL='http://192.168.0.33:80/brichat'
# VUE_APP_API_BASE_URL='http://192.168.0.33:80/brichat'
VUE_APP_GITHUB_USER_NAME=test
VUE_APP_SECRET_KEY=preview
# VUE_APP_GITHUB_USER_NAME=test
# VUE_APP_SECRET_KEY=preview

View File

@ -138,4 +138,14 @@ export const deleteDataset = (id: string) => {
datasetId
}
})
}
/**
*
*/
export const getDeepAnalysisList = () => {
return request({
url: '/brichat-service/datasetManage/document/deepAnalysisList',
method: 'get'
})
}

View File

@ -295,6 +295,7 @@ export default {
download: 'Download',
rename: 'Rename',
delete: 'Delete',
deepAnalysis: 'Deep Analysis',
},
search: {
placeholder: 'Enter keywords to search',
@ -314,6 +315,8 @@ export default {
segmentation: 'Segmentation Rules',
separatorPlaceholder: 'Separator (default: ###)',
maxTokens: 'Max Tokens',
deepAnalysis: 'Deep Analysis',
deepAnalysisOption: 'Enable deep analysis to improve document understanding accuracy',
selectFile: 'Select the file',
fileTip:
'Multiple selection supported, max 100MB per file. Accepted formats: TXT, MD, MARKDOWN, MDX, PDF, HTML, HTM, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES',
@ -331,6 +334,14 @@ export default {
confirm: 'Confirm',
deleteconfirm: 'Delete Confirm',
},
deepAnalysisDialog: {
title: 'Deep Analysis Task List',
noTasks: 'No deep analysis tasks are currently in progress',
datasetName: 'Dataset',
taskId: 'Task ID',
createTime: 'Create Time',
close: 'Close',
},
messages: {
FilenamecantEmpty: 'Filename cannot be empty',
uploadSuccess: 'Upload successfully',
@ -359,6 +370,7 @@ export default {
fetchFailed: 'Failed to get document list: ',
previewFailed: 'Preview failed: ',
downloadFailed: 'Download failed: ',
fetchDeepAnalysisFailed: 'Failed to get deep analysis task list',
},
},
},
@ -515,70 +527,35 @@ export default {
Tip: 'Tip',
selectDeleteData: 'Please select the data to be deleted',
confirmDeleteApiKeys: 'This operation will permanently delete selected ',
confirmDeleteApiKeysEnd: ' API key, are you sure?',
confirmDeleteApiKeysEndPlural: ' API keys, are you sure?',
confirmDeleteApiKeysEnd:' API key, are you sure?',
confirmDeleteApiKeysEndPlural:' API keys, are you sure?',
confirmRefreshCache: 'This operation will refresh Redis cache, reloading all API keys, are you sure?',
updateSuccess: 'Update Successfully',
updateFail: 'Update Failed',
addSuccess: 'Add Successfully',
addFail: 'Add Failed',
addFail:'Add Failed'
},
prologue: {
title: 'Opening Statement Management',
query: 'Query',
save: 'Save',
prologue:{
chatType: 'Chat Type',
chatTypes: {
'1': 'Fault Diagnosis',
'2': 'Intelligent Q&A',
'3': 'Chart Report',
'4': 'Emergency Assistant Q&A',
'5': 'Diagnostic Code Lookup',
},
openingContent: 'Opening Statement Content',
addRecommend: 'Add Recommended Question',
recommendation: 'Recommended Question',
action: 'Action',
selectChatType: 'Select Chat Type',
prologueContent: 'Prologue Content',
addRecommend: 'Add Recommendation',
recommendQuestion: 'Recommended Question',
actions: 'Actions',
edit: 'Edit',
delete: 'Delete',
dialog: {
addTitle: 'Add Recommended Question',
editTitle: 'Edit Recommended Question',
questionContent: 'Recommended Question',
cancel: 'Cancel',
confirm: 'Confirm',
save: 'Save',
},
message: {
loadSuccess: 'Loaded successfully',
loadFailed: 'Load failed, please try again later',
loadFailedMess: 'Failed to load recommended questions',
notFound: 'Opening statement not found',
saveSuccess: 'Saved successfully',
saveFailed: 'Save failed, please try again later',
addSuccess: 'Recommended question added successfully',
addFailed: 'Failed to add recommended question',
editSuccess: 'Recommended question edited successfully',
editFailed: 'Failed to edit recommended question, please try again later',
deleteSuccess: 'Deleted successfully',
deleteFailed: 'Deletion failed',
},
confirm: {
save: 'Are you sure you want to save the changes?',
tips: 'Tips',
delete: 'Are you sure you want to delete this recommended question?',
deleteConfirm: 'Delete Confirmation',
},
placeholder: {
selectType: 'Select chat type',
},
},
vabtabs: {
refresh: 'Refresh',
closeOthers: 'Close Others',
closeLeft: 'Close Left',
closeRight: 'Close Right',
closeAll: 'Close All',
},
confirmDelete: 'Are you sure you want to delete this recommendation?',
confirmSave: 'Are you sure you want to save the changes?',
deleteSuccess: 'Deleted successfully',
deleteFailure: 'Failed to delete, please try again later',
saveSuccess: 'Saved successfully',
saveFailure: 'Failed to save, please try again later',
loadSuccess: 'Loaded successfully',
loadFailure: 'Failed to load, please try again later',
noContent: 'No content found',
loading: 'Loading...',
updateRecommendation: 'Update Recommendation',
addRecommendation: 'Add Recommendation',
}
},
}

View File

@ -303,6 +303,7 @@ export default {
download: '下载',
rename: '重命名',
delete: '删除',
deepAnalysis: '深度解析',
},
search: {
placeholder: '请输入关键词查询',
@ -322,6 +323,8 @@ export default {
segmentation: '分段规则',
separatorPlaceholder: '分隔符(默认###',
maxTokens: '最大Token数',
deepAnalysis: '深度解析',
deepAnalysisOption: '启用深度解析,提高文档理解精度',
selectFile: '选择文件',
fileTip: '支持多选单个文件不超过100MB可接受格式TXT、MD、MARKDOWN、MDX、PDF、HTML、HTM、XLSX、XLS、DOCX、CSV、VTT、PROPERTIES',
cancel: '取消',
@ -338,6 +341,14 @@ export default {
confirm: '确定',
deleteconfirm: '删除确认',
},
deepAnalysisDialog: {
title: '深度解析任务列表',
noTasks: '当前没有正在进行的深度解析任务',
datasetName: '知识库',
taskId: '任务ID',
createTime: '创建时间',
close: '关闭',
},
messages: {
FilenamecantEmpty: '文件名不能为空',
uploadSuccess: '上传成功',
@ -367,6 +378,7 @@ export default {
fetchFailed: '获取文档列表失败: ',
previewFailed: '预览失败: ',
downloadFailed: '下载失败: ',
fetchDeepAnalysisFailed: '获取深度解析任务列表失败',
},
},
},

View File

@ -37,6 +37,7 @@
{{t('vabI18n.knowledge.document.buttons.batchDelete', { count: selectedRows.length })}}
</el-button>
<el-button :icon="Refresh" @click="handleRefresh">{{t('vabI18n.knowledge.document.buttons.refresh')}}</el-button>
<el-button type="info" @click="handleDeepAnalysis">{{t('vabI18n.knowledge.document.buttons.deepAnalysis')}}</el-button>
<el-button type="primary" :icon="Upload" @click="triggerFileInput">{{t('vabI18n.knowledge.document.buttons.upload')}}</el-button>
</div>
</div>
@ -186,6 +187,12 @@
</div>
</el-form-item>
<el-form-item :label="t('vabI18n.knowledge.document.uploadDialog.deepAnalysis')" label-width="auto">
<el-checkbox v-model="uploadForm.deepAnalysis">
{{t('vabI18n.knowledge.document.uploadDialog.deepAnalysisOption')}}
</el-checkbox>
</el-form-item>
<el-upload
ref="uploadRef"
multiple
@ -267,6 +274,38 @@
<el-button type="primary" @click="handleRenameConfirm">{{t('vabI18n.knowledge.document.renameDialog.confirm')}}</el-button>
</template>
</el-dialog>
<!-- 深度解析任务列表对话框 -->
<el-dialog v-model="deepAnalysisDialogVisible" :title="t('vabI18n.knowledge.document.deepAnalysisDialog.title')" width="800px" class="deep-analysis-dialog">
<div v-loading="deepAnalysisLoading" :element-loading-text="t('vabI18n.knowledge.document.messages.loading')">
<div v-if="deepAnalysisList.length === 0 && !deepAnalysisLoading" class="empty-state">
<el-empty :description="t('vabI18n.knowledge.document.deepAnalysisDialog.noTasks')" />
</div>
<div v-else class="task-list">
<div v-for="task in deepAnalysisList" :key="task.taskId" class="task-item">
<div class="task-header">
<div class="task-info">
<h4 class="task-name">{{ task.name }}</h4>
<p class="task-dataset">{{t('vabI18n.knowledge.document.deepAnalysisDialog.datasetName')}}: {{ task.datasetName }}</p>
<p class="task-id">{{t('vabI18n.knowledge.document.deepAnalysisDialog.taskId')}}: {{ task.taskId }}</p>
<p class="task-time">{{t('vabI18n.knowledge.document.deepAnalysisDialog.createTime')}}: {{ formatTimestampToLocaleString(task.createTime) }}</p>
</div>
<div class="task-progress">
<el-progress
:percentage="Math.round(task.percent * 100)"
:status="task.percent >= 1 ? 'success' : 'warning'"
:stroke-width="8"
/>
</div>
</div>
</div>
</div>
</div>
<template #footer>
<el-button @click="fetchDeepAnalysisList" :loading="deepAnalysisLoading">{{t('vabI18n.knowledge.document.buttons.refresh')}}</el-button>
<el-button type="primary" @click="deepAnalysisDialogVisible = false">{{t('vabI18n.knowledge.document.deepAnalysisDialog.close')}}</el-button>
</template>
</el-dialog>
</div>
</template>
@ -274,7 +313,7 @@
import { useRoute } from 'vue-router'
import { ref, reactive } from 'vue'
import VueOfficePdf from '@vue-office/pdf'
import { getDatasetDocPage, uploadDocument, deleteDocument, downloadDocument, previewDocumentUrl, renameDocument} from '@/api/dataset'
import { getDatasetDocPage, uploadDocument, deleteDocument, downloadDocument, previewDocumentUrl, renameDocument, getDeepAnalysisList } from '@/api/dataset'
//VueOfficeDocx
import VueOfficeDocx from '@vue-office/docx'
//
@ -328,7 +367,8 @@ const uploadForm = reactive({
indexingTechnique: 'high_quality',
preProcessingRules: ['remove_extra_spaces', 'remove_urls_emails'],
segmentSeparator: '###',
segmentMaxTokens: 500
segmentMaxTokens: 500,
deepAnalysis: false
})
//
@ -338,6 +378,21 @@ const renameForm = reactive({
newName: ''
})
//
const deepAnalysisDialogVisible = ref(false)
const deepAnalysisLoading = ref(false)
const deepAnalysisList = ref<PdfTask[]>([])
//
interface PdfTask {
name: string
taskId: string
percent: number
datasetName: string
createTime: number
}
const getFileTypeIcon = (fileType: string) => {
// 使
// const getIconUrl = (iconName: string) => {
@ -625,6 +680,7 @@ const handleUpload = async () => {
datasetId: datasetId.value,
indexingTechnique: uploadForm.indexingTechnique,
processRule: processRule,
deepAnalysis: uploadForm.deepAnalysis
})], {
type: 'application/json'
}))
@ -806,6 +862,34 @@ const handleRefresh = () => {
fetchDocuments()
}
//
const handleDeepAnalysis = async () => {
deepAnalysisDialogVisible.value = true
await fetchDeepAnalysisList()
}
const fetchDeepAnalysisList = async () => {
deepAnalysisLoading.value = true
try {
const { data } = await getDeepAnalysisList()
deepAnalysisList.value = data || []
} catch (error) {
console.error('获取深度解析任务列表失败:', error)
ElNotification({
title: t('vabI18n.knowledge.document.errors.fetchDeepAnalysisFailed'),
message: error instanceof Error ? error.message : t('vabI18n.knowledge.document.messages.NoKnowError'),
type: 'error'
})
} finally {
deepAnalysisLoading.value = false
}
}
const formatTimestampToLocaleString = (timestamp: number): string => {
const date = new Date(timestamp * 1000)
return date.toLocaleString()
}
//
const formatTimestamp = (timestamp: number): string => {
const date = new Date(timestamp * 1000)
@ -1249,6 +1333,83 @@ const handleSearch = () => {
to { transform: rotate(360deg); }
}
//
.deep-analysis-dialog {
::v-deep .el-dialog__header {
background: linear-gradient(135deg, #409eff 0%, #67c23a 100%);
color: white;
border-radius: 8px 8px 0 0;
}
::v-deep .el-dialog__title {
color: white;
font-weight: 600;
}
}
.empty-state {
padding: 40px 20px;
text-align: center;
}
.task-list {
max-height: 400px;
overflow-y: auto;
padding: 10px 0;
}
.task-item {
border: 1px solid #ebeef5;
border-radius: 8px;
margin-bottom: 12px;
padding: 16px;
background: #fafafa;
transition: all 0.3s ease;
&:hover {
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
transform: translateY(-2px);
}
}
.task-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
gap: 20px;
}
.task-info {
flex: 1;
.task-name {
margin: 0 0 8px 0;
font-size: 16px;
font-weight: 600;
color: #303133;
word-break: break-all;
}
.task-dataset, .task-id, .task-time {
margin: 4px 0;
font-size: 12px;
color: #909399;
}
.task-dataset {
color: #67c23a;
font-weight: 500;
}
}
.task-progress {
flex: 0 0 200px;
::v-deep .el-progress__text {
font-weight: 600;
}
}
//
@media (max-width: 768px) {
.main-container {
@ -1274,5 +1435,14 @@ const handleSearch = () => {
flex-direction: column;
gap: 4px;
}
.task-header {
flex-direction: column;
gap: 12px;
}
.task-progress {
flex: 1;
}
}
</style>

View File

@ -0,0 +1,402 @@
package com.bjtds.brichat.service.task;
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
import com.bjtds.brichat.entity.dataset.RetrievalModel;
import com.bjtds.brichat.entity.dto.PdfTaskDto;
import com.bjtds.brichat.entity.dto.PdfTaskStatusResponse;
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.ResponseEntity;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.multipart.MultipartFile;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
/**
* PDF转换任务定时服务
*/
@Service
public class PdfConversionTaskService {
private static final Logger logger = LoggerFactory.getLogger(PdfConversionTaskService.class);
private static final String PDF_TASK_REDIS_KEY = "pdf:conversion:tasks";
@Autowired
private RestTemplate restTemplate;
@Autowired
@Qualifier("redisTemplate")
private RedisTemplate<String, Object> redisTemplate;
@Autowired
private DifyDatasetApiService difyDatasetApiService;
@Value("${pdf.conversion.service.url}")
private String pdfConversionServiceUrl;
/**
* 定时任务每10秒检查一次PDF转换任务状态
*/
@Scheduled(fixedRate = 10000) // 10秒执行一次
public void checkPdfConversionTasks() {
try {
// 获取所有待处理的任务ID
List<Object> taskIds = redisTemplate.opsForList().range(PDF_TASK_REDIS_KEY + ":list", 0, -1);
if (taskIds == null || taskIds.isEmpty()) {
logger.debug("没有待处理的PDF转换任务");
return;
}
logger.info("开始检查PDF转换任务状态共{}个任务", taskIds.size());
for (Object taskIdObj : taskIds) {
String taskId = taskIdObj.toString();
try {
checkSingleTask(taskId);
} catch (Exception e) {
logger.error("检查任务{}状态时发生错误: {}", taskId, e.getMessage(), e);
}
}
} catch (Exception e) {
logger.error("定时检查PDF转换任务时发生错误: {}", e.getMessage(), e);
}
}
/**
* 检查单个任务的状态
*/
private void checkSingleTask(String taskId) {
try {
// 从Redis获取任务信息
String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId;
PdfTaskDto taskInfo = (PdfTaskDto) redisTemplate.opsForHash().get(hashKey, "taskInfo");
if (taskInfo == null) {
logger.warn("任务{}的信息在Redis中不存在从队列中移除", taskId);
removeTaskFromQueue(taskId);
return;
}
// 调用状态查询接口
PdfTaskStatusResponse statusResponse = queryTaskStatus(taskId);
if (statusResponse == null) {
logger.warn("无法获取任务{}的状态信息", taskId);
return;
}
logger.info("任务{}状态: {}, 进度: {}", taskId, statusResponse.getStatus(),
statusResponse.getProgress() != null ? statusResponse.getProgress().toString() : "无进度信息");
// 根据状态处理任务
switch (statusResponse.getStatus()) {
case "SUCCESS":
handleSuccessTask(taskInfo, statusResponse);
break;
case "FAILURE":
handleFailedTask(taskInfo, statusResponse);
break;
case "STARTED":
case "PENDING":
updateTaskProgress(taskInfo, statusResponse);
break;
default:
logger.warn("任务{}状态未知: {}", taskId, statusResponse.getStatus());
break;
}
} catch (Exception e) {
logger.error("检查任务{}时发生错误: {}", taskId, e.getMessage(), e);
}
}
/**
* 查询任务状态
*/
private PdfTaskStatusResponse queryTaskStatus(String taskId) {
String url = pdfConversionServiceUrl + "/v1/pdf2md/status/" + taskId;
HttpHeaders headers = new HttpHeaders();
HttpEntity<?> requestEntity = new HttpEntity<>(headers);
try {
ResponseEntity<PdfTaskStatusResponse> response = restTemplate.exchange(
url,
HttpMethod.GET,
requestEntity,
PdfTaskStatusResponse.class
);
return response.getBody();
} catch (Exception e) {
logger.error("查询任务{}状态失败: {}", taskId, e.getMessage());
return null;
}
}
/**
* 处理成功完成的任务
*/
private void handleSuccessTask(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) {
String taskId = taskInfo.getTaskId();
String result = statusResponse.getResult();
logger.info("=== PDF转换任务完成 ===");
logger.info("任务ID: {}", taskId);
logger.info("文件名: {}", taskInfo.getName());
logger.info("数据集ID: {}", taskInfo.getDatasetId());
if (statusResponse.getProgress() != null) {
logger.info("转换进度: {}", statusResponse.getProgress().toString());
}
if (StringUtils.hasText(result)) {
logger.info("=== 转换结果开始 ===");
logger.info("MD文件路径: {}", result);
logger.info("=== 转换结果结束 ===");
// 上传MD文件到Dify知识库
try {
uploadMdFileToDifyDataset(taskInfo, result);
logger.info("MD文件已成功上传到知识库");
} catch (Exception e) {
logger.error("上传MD文件到知识库失败: {}", e.getMessage(), e);
}
} else {
logger.warn("任务{}转换完成但结果为空", taskId);
}
// 从队列中移除任务
removeTaskFromQueue(taskId);
logger.info("任务{}已完成并从队列中移除", taskId);
}
/**
* 处理失败的任务
*/
private void handleFailedTask(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) {
String taskId = taskInfo.getTaskId();
logger.error("=== PDF转换任务失败 ===");
logger.error("任务ID: {}", taskId);
logger.error("文件名: {}", taskInfo.getName());
logger.error("数据集ID: {}", taskInfo.getDatasetId());
if (statusResponse.getError() != null) {
logger.error("错误信息: {}", statusResponse.getError());
}
// 从队列中移除失败的任务
removeTaskFromQueue(taskId);
logger.info("失败任务{}已从队列中移除", taskId);
}
/**
* 更新任务进度
*/
private void updateTaskProgress(PdfTaskDto taskInfo, PdfTaskStatusResponse statusResponse) {
String taskId = taskInfo.getTaskId();
// 更新任务进度
if (statusResponse.getProgress() != null) {
Double newPercent = statusResponse.getProgress().getPercent();
taskInfo.setPercent(newPercent);
// 更新Redis中的任务信息
String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId;
redisTemplate.opsForHash().put(hashKey, "taskInfo", taskInfo);
logger.debug("任务{}进度更新: {}%", taskId, String.format("%.1f", newPercent));
}
}
/**
* 从队列中移除任务
*/
private void removeTaskFromQueue(String taskId) {
try {
// 从任务列表中移除
redisTemplate.opsForList().remove(PDF_TASK_REDIS_KEY + ":list", 1, taskId);
// 删除任务详细信息
String hashKey = PDF_TASK_REDIS_KEY + ":" + taskId;
redisTemplate.delete(hashKey);
logger.debug("任务{}已从Redis队列中移除", taskId);
} catch (Exception e) {
logger.error("移除任务{}时发生错误: {}", taskId, e.getMessage(), e);
}
}
/**
* 上传MD文件到Dify知识库
*/
private void uploadMdFileToDifyDataset(PdfTaskDto taskInfo, String mdFilePath) throws Exception {
logger.info("开始上传MD文件到知识库: filePath={}, datasetId={}", mdFilePath, taskInfo.getDatasetId());
// 1. 验证文件是否存在
Path path = Paths.get(mdFilePath);
if (!Files.exists(path)) {
throw new RuntimeException("MD文件不存在: " + mdFilePath);
}
// 2. 创建MultipartFile
MultipartFile multipartFile = createMultipartFileFromPath(path);
// 3. 调用上传接口
ResponseEntity<Map> response = difyDatasetApiService.createDocumentByFile(taskInfo.getUploadReq(), multipartFile);
if (response.getStatusCode().is2xxSuccessful()) {
logger.info("MD文件上传成功: taskId={}, fileName={}, response={}",
taskInfo.getTaskId(), taskInfo.getName(), response.getBody());
} else {
throw new RuntimeException("MD文件上传失败HTTP状态码: " + response.getStatusCode());
}
}
/**
* 从文件路径创建MultipartFile (JDK 1.8兼容版本)
*/
private MultipartFile createMultipartFileFromPath(Path filePath) throws IOException {
File file = filePath.toFile();
String fileName = file.getName();
String originalFileName = fileName.endsWith(".md") ? fileName : fileName + ".md";
// JDK 1.8兼容的文件读取方式
byte[] content = readFileToByteArray(file);
return new SimpleMultipartFile(
"file",
originalFileName,
"text/markdown",
content
);
}
/**
* JDK 1.8兼容的文件读取方法
*/
private byte[] readFileToByteArray(File file) throws IOException {
try (FileInputStream fis = new FileInputStream(file);
ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
byte[] buffer = new byte[8192];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
baos.write(buffer, 0, bytesRead);
}
return baos.toByteArray();
}
}
/**
* 简单的MultipartFile实现类 (JDK 1.8兼容)
*/
private static class SimpleMultipartFile implements MultipartFile {
private final String name;
private final String originalFilename;
private final String contentType;
private final byte[] content;
public SimpleMultipartFile(String name, String originalFilename, String contentType, byte[] content) {
this.name = name;
this.originalFilename = originalFilename;
this.contentType = contentType;
this.content = content;
}
@Override
public String getName() {
return name;
}
@Override
public String getOriginalFilename() {
return originalFilename;
}
@Override
public String getContentType() {
return contentType;
}
@Override
public boolean isEmpty() {
return content.length == 0;
}
@Override
public long getSize() {
return content.length;
}
@Override
public byte[] getBytes() throws IOException {
return content;
}
@Override
public InputStream getInputStream() throws IOException {
return new ByteArrayInputStream(content);
}
@Override
public void transferTo(File dest) throws IOException, IllegalStateException {
try (FileOutputStream fos = new FileOutputStream(dest)) {
fos.write(content);
}
}
}
/**
* 创建文档上传请求
*/
private DocumentUploadReq createDocumentUploadRequest(String datasetId) {
DocumentUploadReq request = new DocumentUploadReq();
request.setDatasetId(datasetId);
request.setDeepAnalysis(false); // MD文件不需要深度解析
request.setIndexingTechnique("high_quality");
request.setDocLanguage("chinese");
// 设置处理规则
DocumentUploadReq.ProcessRule processRule = new DocumentUploadReq.ProcessRule();
processRule.setMode("automatic");
request.setProcessRule(processRule);
// 设置检索模式默认混合检索
RetrievalModel retrievalModel = new RetrievalModel();
retrievalModel.setSearchMethod("hybrid_search");
retrievalModel.setRerankingEnable(true);
RetrievalModel.RerankingModel rerankingModel = new RetrievalModel.RerankingModel();
rerankingModel.setRerankingModelName("bge-reanker-v2-m3");
rerankingModel.setRerankingProviderName("langgenius/huggingface_tei/huggingface_tei");
retrievalModel.setTopK(3);
retrievalModel.setRerankingModel(rerankingModel);
retrievalModel.setScoreThresholdEnabled(false);
retrievalModel.setScoreThreshold(0.5f);
request.setRetrievalModel(retrievalModel);
return request;
}
}