深度解析后端代码提交
This commit is contained in:
parent
7a8413cf38
commit
6b0b97d0a6
|
@ -3,9 +3,11 @@ package com.bjtds;
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||||
|
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||||
|
|
||||||
@SpringBootApplication
|
@SpringBootApplication
|
||||||
@EnableFeignClients
|
@EnableFeignClients
|
||||||
|
@EnableScheduling
|
||||||
public class BriChatServiceApplication {
|
public class BriChatServiceApplication {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
|
|
@ -2,6 +2,7 @@ package com.bjtds.brichat.controller;
|
||||||
import cn.hutool.core.io.resource.InputStreamResource;
|
import cn.hutool.core.io.resource.InputStreamResource;
|
||||||
import com.bjtds.brichat.entity.dataset.DatasetsDocRenameReq;
|
import com.bjtds.brichat.entity.dataset.DatasetsDocRenameReq;
|
||||||
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
||||||
|
import com.bjtds.brichat.entity.dto.PdfTaskDto;
|
||||||
import com.bjtds.brichat.service.DatasetsDocService;
|
import com.bjtds.brichat.service.DatasetsDocService;
|
||||||
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
||||||
import com.bjtds.brichat.util.Constants;
|
import com.bjtds.brichat.util.Constants;
|
||||||
|
@ -16,8 +17,10 @@ import io.github.guoshiqiufeng.dify.dataset.dto.response.DocumentInfo;
|
||||||
import io.github.guoshiqiufeng.dify.dataset.dto.response.UploadFileInfoResponse;
|
import io.github.guoshiqiufeng.dify.dataset.dto.response.UploadFileInfoResponse;
|
||||||
import io.swagger.annotations.Api;
|
import io.swagger.annotations.Api;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.core.io.ByteArrayResource;
|
import org.springframework.core.io.ByteArrayResource;
|
||||||
|
import org.springframework.data.redis.core.RedisTemplate;
|
||||||
import org.springframework.http.*;
|
import org.springframework.http.*;
|
||||||
import org.springframework.web.bind.annotation.*;
|
import org.springframework.web.bind.annotation.*;
|
||||||
import org.springframework.web.client.RestTemplate;
|
import org.springframework.web.client.RestTemplate;
|
||||||
|
@ -25,6 +28,9 @@ import org.springframework.web.multipart.MultipartFile;
|
||||||
import javax.annotation.Resource;
|
import javax.annotation.Resource;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.springframework.http.HttpHeaders;
|
import org.springframework.http.HttpHeaders;
|
||||||
import org.springframework.http.HttpStatus;
|
import org.springframework.http.HttpStatus;
|
||||||
|
@ -59,7 +65,8 @@ public class DatasetDocController {
|
||||||
|
|
||||||
@Value("${dify.url}")
|
@Value("${dify.url}")
|
||||||
private String difyUrl;
|
private String difyUrl;
|
||||||
|
@Autowired
|
||||||
|
private RedisTemplate<String, Object> redisTemplate;
|
||||||
|
|
||||||
|
|
||||||
@PostMapping("/page")
|
@PostMapping("/page")
|
||||||
|
@ -168,6 +175,61 @@ public class DatasetDocController {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取深度解析任务列表(正在pdf-> md的列表)
|
||||||
|
* 返回:PdfTaskDto列表
|
||||||
|
*
|
||||||
|
* @return ResultUtils 包含PdfTaskDto列表的响应
|
||||||
|
*/
|
||||||
|
@GetMapping("/deepAnalysisList")
|
||||||
|
public ResultUtils getDeepAnalysisList(){
|
||||||
|
List<PdfTaskDto> taskList = new ArrayList<>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 1. 获取所有任务ID列表
|
||||||
|
List<Object> taskIds = redisTemplate.opsForList().range(Constants.PDF_TASK_REDIS_KEY + ":list", 0, -1);
|
||||||
|
|
||||||
|
if (taskIds == null || taskIds.isEmpty()) {
|
||||||
|
log.info("当前没有正在处理的PDF转换任务");
|
||||||
|
return ResultUtils.success(taskList);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("获取到{}个PDF转换任务", taskIds.size());
|
||||||
|
|
||||||
|
// 2. 遍历任务ID,获取具体任务信息
|
||||||
|
for (Object taskIdObj : taskIds) {
|
||||||
|
if (taskIdObj == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String taskId = taskIdObj.toString();
|
||||||
|
try {
|
||||||
|
// 3. 根据任务ID从Redis获取任务详细信息
|
||||||
|
String hashKey = Constants.PDF_TASK_REDIS_KEY + ":" + taskId;
|
||||||
|
Object taskInfoObj = redisTemplate.opsForHash().get(hashKey, "taskInfo");
|
||||||
|
|
||||||
|
if (taskInfoObj instanceof PdfTaskDto) {
|
||||||
|
PdfTaskDto taskInfo = (PdfTaskDto) taskInfoObj;
|
||||||
|
taskList.add(taskInfo);
|
||||||
|
log.debug("获取任务信息成功: taskId={}, name={}, percent={}",
|
||||||
|
taskId, taskInfo.getName(), taskInfo.getPercent());
|
||||||
|
} else {
|
||||||
|
log.warn("任务{}的信息格式不正确或不存在", taskId);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("获取任务{}的详细信息失败: {}", taskId, e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("成功获取{}个有效的PDF转换任务信息", taskList.size());
|
||||||
|
return ResultUtils.success(taskList);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("获取深度解析任务列表失败: {}", e.getMessage(), e);
|
||||||
|
return ResultUtils.error("获取深度解析任务列表失败: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,8 @@ public class DocumentUploadReq implements Serializable {
|
||||||
|
|
||||||
private String datasetId;
|
private String datasetId;
|
||||||
|
|
||||||
|
private Boolean deepAnalysis;
|
||||||
|
|
||||||
@JsonProperty("original_document_id")
|
@JsonProperty("original_document_id")
|
||||||
@JsonAlias({"originalDocumentId"})
|
@JsonAlias({"originalDocumentId"})
|
||||||
private String originalDocumentId;
|
private String originalDocumentId;
|
||||||
|
|
|
@ -4,4 +4,6 @@ import com.bjtds.brichat.entity.dataset.DatasetsDocRenameReq;
|
||||||
|
|
||||||
public interface DatasetsDocService {
|
public interface DatasetsDocService {
|
||||||
void renameFile(DatasetsDocRenameReq request);
|
void renameFile(DatasetsDocRenameReq request);
|
||||||
|
|
||||||
|
String getUploadById(String id);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,45 +4,76 @@ import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
||||||
import com.bjtds.brichat.entity.dataset.RetrievalModel;
|
import com.bjtds.brichat.entity.dataset.RetrievalModel;
|
||||||
import com.bjtds.brichat.entity.dify.DatasetDto;
|
import com.bjtds.brichat.entity.dify.DatasetDto;
|
||||||
import com.bjtds.brichat.entity.dify.DifyDatasetResponse;
|
import com.bjtds.brichat.entity.dify.DifyDatasetResponse;
|
||||||
|
import com.bjtds.brichat.entity.dto.PdfConversionResponse;
|
||||||
|
import com.bjtds.brichat.entity.dto.PdfTaskDto;
|
||||||
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
||||||
import com.bjtds.brichat.util.Constants;
|
import com.bjtds.brichat.util.Constants;
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import io.github.guoshiqiufeng.dify.dataset.DifyDataset;
|
||||||
|
import io.github.guoshiqiufeng.dify.dataset.dto.request.DatasetInfoRequest;
|
||||||
|
import io.github.guoshiqiufeng.dify.dataset.dto.response.DatasetInfoResponse;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.beans.factory.annotation.Qualifier;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.core.ParameterizedTypeReference;
|
import org.springframework.core.ParameterizedTypeReference;
|
||||||
import org.springframework.core.io.ByteArrayResource;
|
import org.springframework.core.io.ByteArrayResource;
|
||||||
|
import org.springframework.data.redis.core.RedisTemplate;
|
||||||
import org.springframework.http.*;
|
import org.springframework.http.*;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.util.LinkedMultiValueMap;
|
import org.springframework.util.LinkedMultiValueMap;
|
||||||
import org.springframework.util.MultiValueMap;
|
import org.springframework.util.MultiValueMap;
|
||||||
|
import org.springframework.util.StringUtils;
|
||||||
import org.springframework.web.client.RestTemplate;
|
import org.springframework.web.client.RestTemplate;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
|
import javax.annotation.Resource;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(DifyDatasetApiServiceImpl.class);
|
||||||
|
private static final String PDF_TASK_REDIS_KEY = "pdf:conversion:tasks";
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private RestTemplate restTemplate;
|
private RestTemplate restTemplate;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
@Qualifier("redisTemplate")
|
||||||
|
private RedisTemplate<String, Object> redisTemplate;
|
||||||
|
|
||||||
|
//开源组件
|
||||||
|
@Resource
|
||||||
|
private DifyDataset difyDatasetService;
|
||||||
|
|
||||||
// @Value("${bjtds.difyDatasets.datasetPath}")
|
// @Value("${bjtds.difyDatasets.datasetPath}")
|
||||||
// @Value("${dify.url}")
|
// @Value("${dify.url}")
|
||||||
// private String datasetPat;
|
// private String datasetPat;
|
||||||
@Value("${dify.url}")
|
@Value("${dify.url}")
|
||||||
private String difyUrl;
|
private String difyUrl;
|
||||||
|
|
||||||
|
|
||||||
@Value("${dify.dataset.api-key}")
|
@Value("${dify.dataset.api-key}")
|
||||||
private String apiKey;
|
private String apiKey;
|
||||||
|
|
||||||
|
@Value("${pdf.conversion.service.url}")
|
||||||
|
private String pdfConversionServiceUrl;
|
||||||
|
|
||||||
|
@Value("${pdf.conversion.service.api-url}")
|
||||||
|
private String pdfConversionApiUrl;
|
||||||
|
|
||||||
|
@Value("${pdf.conversion.service.model-name}")
|
||||||
|
private String pdfConversionModelName;
|
||||||
|
|
||||||
|
@Value("${pdf.conversion.service.max-workers}")
|
||||||
|
private Integer pdfConversionMaxWorkers;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ResponseEntity<DatasetDto> createDataset(String name, String description) {
|
public ResponseEntity<DatasetDto> createDataset(String name, String description) {
|
||||||
// 1. 设置请求URL
|
// 1. 设置请求URL
|
||||||
|
@ -98,6 +129,23 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*1 若文件为类型pdf
|
||||||
|
* 1.1远程调用接口将pdf文件解析并转换markdown文件,获取回调任务id
|
||||||
|
* 2.1将回调的任务id放入redis队列中
|
||||||
|
* redis中存储的是一个对象数组,对象包含 name,taskId,percent(解析百分比)
|
||||||
|
* 3.1定时任务读取任务id的状态,若已解析完毕上传md文件至dify知识库中(在其他类中进行)
|
||||||
|
*
|
||||||
|
*2 若文件类型为其他类型,正常执行上传逻辑
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param request
|
||||||
|
* @param file
|
||||||
|
* @return
|
||||||
|
* @throws JsonProcessingException
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public ResponseEntity<Map> createDocumentByFile(DocumentUploadReq request, MultipartFile file) throws JsonProcessingException {
|
public ResponseEntity<Map> createDocumentByFile(DocumentUploadReq request, MultipartFile file) throws JsonProcessingException {
|
||||||
// 参数验证
|
// 参数验证
|
||||||
|
@ -109,6 +157,81 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
throw new IllegalArgumentException("上传文件不能为空");
|
throw new IllegalArgumentException("上传文件不能为空");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 获取文件名和扩展名
|
||||||
|
String originalFilename = file.getOriginalFilename();
|
||||||
|
if (originalFilename == null) {
|
||||||
|
throw new IllegalArgumentException("文件名不能为空");
|
||||||
|
}
|
||||||
|
|
||||||
|
String fileExtension = getFileExtension(originalFilename).toLowerCase();
|
||||||
|
logger.info("上传文件: {}, 扩展名: {}", originalFilename, fileExtension);
|
||||||
|
|
||||||
|
// 1. 需深度解析,提取图片中的文本
|
||||||
|
if (request.getDeepAnalysis()) {
|
||||||
|
return handlePdfFile(request, file);
|
||||||
|
} else {
|
||||||
|
// 2.
|
||||||
|
return handleNormalFile(request, file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 处理PDF文件
|
||||||
|
* 1.1 远程调用接口将pdf文件解析并转换markdown文件,获取回调任务id
|
||||||
|
* 2.1 将回调的任务id放入redis队列中
|
||||||
|
*/
|
||||||
|
private ResponseEntity<Map> handlePdfFile(DocumentUploadReq request, MultipartFile file) throws JsonProcessingException {
|
||||||
|
try {
|
||||||
|
// 1.1 调用PDF转换服务
|
||||||
|
String taskId = callPdfConversionService(file);
|
||||||
|
|
||||||
|
// 2.1 将任务信息存入Redis
|
||||||
|
// PdfTaskDto pdfTask = new PdfTaskDto(
|
||||||
|
// file.getOriginalFilename(),
|
||||||
|
// taskId,
|
||||||
|
// 0.0, // 初始化进度为0%
|
||||||
|
// request.getDatasetId()
|
||||||
|
// );
|
||||||
|
DatasetInfoRequest datasetInfoRequest = new DatasetInfoRequest();
|
||||||
|
datasetInfoRequest.setDatasetId(request.getDatasetId());
|
||||||
|
|
||||||
|
DatasetInfoResponse datasetInfo = difyDatasetService.info(datasetInfoRequest);
|
||||||
|
String datasetName = datasetInfo.getName();
|
||||||
|
|
||||||
|
request.setDeepAnalysis(false);
|
||||||
|
|
||||||
|
PdfTaskDto pdfTask = PdfTaskDto.builder()
|
||||||
|
.name(file.getOriginalFilename())
|
||||||
|
.taskId(taskId)
|
||||||
|
.percent(0.0)
|
||||||
|
.datasetId(request.getDatasetId())
|
||||||
|
.datasetName(datasetName)
|
||||||
|
.uploadReq(request)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
storePdfTaskToRedis(pdfTask);
|
||||||
|
|
||||||
|
logger.info("PDF转换任务已提交,任务ID: {}, 文件名: {}", taskId, file.getOriginalFilename());
|
||||||
|
|
||||||
|
// 返回成功响应
|
||||||
|
Map<String, Object> response = new HashMap<>();
|
||||||
|
response.put("success", true);
|
||||||
|
response.put("message", "PDF文件已提交转换,任务ID: " + taskId);
|
||||||
|
response.put("task_id", taskId);
|
||||||
|
response.put("file_name", file.getOriginalFilename());
|
||||||
|
|
||||||
|
return ResponseEntity.ok(response);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("PDF文件处理失败: {}", e.getMessage(), e);
|
||||||
|
throw new RuntimeException("PDF文件处理失败: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 处理非PDF文件,执行正常上传逻辑
|
||||||
|
*/
|
||||||
|
private ResponseEntity<Map> handleNormalFile(DocumentUploadReq request, MultipartFile file) throws JsonProcessingException {
|
||||||
String url = difyUrl + Constants.DATABASE_API + "/{dataset_id}/document/create-by-file";
|
String url = difyUrl + Constants.DATABASE_API + "/{dataset_id}/document/create-by-file";
|
||||||
|
|
||||||
HttpHeaders headers = new HttpHeaders();
|
HttpHeaders headers = new HttpHeaders();
|
||||||
|
@ -125,7 +248,6 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
// 创建临时请求对象,不包含datasetId(datasetId用于URL路径参数)
|
// 创建临时请求对象,不包含datasetId(datasetId用于URL路径参数)
|
||||||
DocumentUploadReq dataRequest = new DocumentUploadReq();
|
DocumentUploadReq dataRequest = new DocumentUploadReq();
|
||||||
dataRequest.setIndexingTechnique(request.getIndexingTechnique());
|
dataRequest.setIndexingTechnique(request.getIndexingTechnique());
|
||||||
|
|
||||||
dataRequest.setProcessRule(request.getProcessRule());
|
dataRequest.setProcessRule(request.getProcessRule());
|
||||||
|
|
||||||
//设置检索模式(默认混合检索)
|
//设置检索模式(默认混合检索)
|
||||||
|
@ -178,5 +300,92 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 调用PDF转换服务
|
||||||
|
*/
|
||||||
|
private String callPdfConversionService(MultipartFile file) {
|
||||||
|
String url = pdfConversionServiceUrl + "/v1/pdf2md";
|
||||||
|
|
||||||
|
HttpHeaders headers = new HttpHeaders();
|
||||||
|
headers.setContentType(MediaType.MULTIPART_FORM_DATA);
|
||||||
|
|
||||||
|
MultiValueMap<String, Object> body = new LinkedMultiValueMap<>();
|
||||||
|
|
||||||
|
// 添加PDF文件
|
||||||
|
try {
|
||||||
|
body.add("pdf_file", new ByteArrayResource(file.getBytes()) {
|
||||||
|
@Override
|
||||||
|
public String getFilename() {
|
||||||
|
return file.getOriginalFilename();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("PDF文件读取失败: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 添加可选参数
|
||||||
|
body.add("api_url", pdfConversionApiUrl);
|
||||||
|
body.add("model_name", pdfConversionModelName);
|
||||||
|
body.add("max_workers", pdfConversionMaxWorkers.toString());
|
||||||
|
|
||||||
|
HttpEntity<MultiValueMap<String, Object>> requestEntity = new HttpEntity<>(body, headers);
|
||||||
|
|
||||||
|
try {
|
||||||
|
ResponseEntity<PdfConversionResponse> response = restTemplate.exchange(
|
||||||
|
url,
|
||||||
|
HttpMethod.POST,
|
||||||
|
requestEntity,
|
||||||
|
PdfConversionResponse.class
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.getBody() == null || !StringUtils.hasText(response.getBody().getTaskId())) {
|
||||||
|
throw new RuntimeException("PDF转换服务返回的任务ID为空");
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.getBody().getTaskId();
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("调用PDF转换服务失败: {}", e.getMessage(), e);
|
||||||
|
throw new RuntimeException("调用PDF转换服务失败: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 将PDF任务信息存储到Redis
|
||||||
|
*/
|
||||||
|
private void storePdfTaskToRedis(PdfTaskDto pdfTask) {
|
||||||
|
try {
|
||||||
|
// 使用Hash结构存储,key为任务ID,value为任务信息
|
||||||
|
String hashKey = PDF_TASK_REDIS_KEY + ":" + pdfTask.getTaskId();
|
||||||
|
redisTemplate.opsForHash().put(hashKey, "taskInfo", pdfTask);
|
||||||
|
|
||||||
|
// 设置过期时间为24小时
|
||||||
|
redisTemplate.expire(hashKey, 24, TimeUnit.HOURS);
|
||||||
|
|
||||||
|
// 同时将任务ID加入到任务列表中,便于定时任务扫描
|
||||||
|
redisTemplate.opsForList().rightPush(PDF_TASK_REDIS_KEY + ":list", pdfTask.getTaskId());
|
||||||
|
|
||||||
|
logger.info("PDF任务信息已存储到Redis: {}", pdfTask);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("存储PDF任务信息到Redis失败: {}", e.getMessage(), e);
|
||||||
|
throw new RuntimeException("存储PDF任务信息到Redis失败: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取文件扩展名
|
||||||
|
*/
|
||||||
|
private String getFileExtension(String filename) {
|
||||||
|
if (filename == null || filename.isEmpty()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
int lastDotIndex = filename.lastIndexOf('.');
|
||||||
|
if (lastDotIndex == -1 || lastDotIndex == filename.length() - 1) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return filename.substring(lastDotIndex + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package com.bjtds.brichat.service.impl;
|
package com.bjtds.brichat.service.impl;
|
||||||
|
|
||||||
|
import cn.hutool.json.JSONUtil;
|
||||||
import com.bjtds.brichat.entity.dataset.DatasetsDocRenameReq;
|
import com.bjtds.brichat.entity.dataset.DatasetsDocRenameReq;
|
||||||
import com.bjtds.brichat.mapper.postgresql.DifyDatasetsDocMapper;
|
import com.bjtds.brichat.mapper.postgresql.DifyDatasetsDocMapper;
|
||||||
import com.bjtds.brichat.service.DatasetsDocService;
|
import com.bjtds.brichat.service.DatasetsDocService;
|
||||||
|
@ -25,4 +26,11 @@ public class DatasetsDocServiceImpl implements DatasetsDocService {
|
||||||
throw new RuntimeException("文档未找到或更新失败");
|
throw new RuntimeException("文档未找到或更新失败");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUploadById(String id) {
|
||||||
|
String dataSourceById = difyDatasetsDocMapper.getDataSourceById(id);
|
||||||
|
String uploadId = JSONUtil.parseObj(dataSourceById).getStr("upload_file_id");
|
||||||
|
return uploadId;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,14 +10,11 @@ import com.bjtds.brichat.entity.dify.DifyUploadFile;
|
||||||
import com.bjtds.brichat.entity.dto.FilePreviewDto;
|
import com.bjtds.brichat.entity.dto.FilePreviewDto;
|
||||||
import com.bjtds.brichat.mapper.postgresql.DifyUploadFileMapper;
|
import com.bjtds.brichat.mapper.postgresql.DifyUploadFileMapper;
|
||||||
import com.bjtds.brichat.service.ChatMessageService;
|
import com.bjtds.brichat.service.ChatMessageService;
|
||||||
import com.bjtds.brichat.service.DocumentService;
|
import com.bjtds.brichat.service.DatasetsDocService;
|
||||||
import com.bjtds.brichat.service.FileService;
|
import com.bjtds.brichat.service.FileService;
|
||||||
import com.bjtds.brichat.util.Constants;
|
import com.bjtds.brichat.util.Constants;
|
||||||
import com.bjtds.brichat.util.FileUploadUtil;
|
import com.bjtds.brichat.util.FileUploadUtil;
|
||||||
import com.bjtds.brichat.util.PdfUtils;
|
import com.bjtds.brichat.util.PdfUtils;
|
||||||
import com.fasterxml.jackson.databind.JsonNode;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import io.github.guoshiqiufeng.dify.dataset.DifyDataset;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.commons.lang3.ArrayUtils;
|
import org.apache.commons.lang3.ArrayUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -36,7 +33,6 @@ import java.io.*;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.StandardCopyOption;
|
import java.nio.file.StandardCopyOption;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
|
@ -73,7 +69,7 @@ public class FileServiceImpl implements FileService {
|
||||||
private DifyUploadFileMapper difyUploadFileMapper;
|
private DifyUploadFileMapper difyUploadFileMapper;
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private DocumentService documentService;
|
private DatasetsDocService datasetsDocService;
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
// public String fileUpload(MultipartFile multiFile, String filePath) {
|
// public String fileUpload(MultipartFile multiFile, String filePath) {
|
||||||
|
@ -384,7 +380,7 @@ public class FileServiceImpl implements FileService {
|
||||||
String sysMessageId = (String)traceJson.get("messageId");
|
String sysMessageId = (String)traceJson.get("messageId");
|
||||||
List<TraceFile> traceFiles = new ArrayList<>();
|
List<TraceFile> traceFiles = new ArrayList<>();
|
||||||
for( String key : tracePdf.keySet()){
|
for( String key : tracePdf.keySet()){
|
||||||
String uploadId = documentService.getUploadById(key);
|
String uploadId = datasetsDocService.getUploadById(key);
|
||||||
//获取文件上传地址
|
//获取文件上传地址
|
||||||
// /var/www/dify/storage + "/" + upload_files/1f93db68-fb8c-4d58-bff2-3e91d72757cf/9b12ca0d-dd5a-4d4e-9d3f-946227ae4e84.xlsx
|
// /var/www/dify/storage + "/" + upload_files/1f93db68-fb8c-4d58-bff2-3e91d72757cf/9b12ca0d-dd5a-4d4e-9d3f-946227ae4e84.xlsx
|
||||||
DifyUploadFile uploadPath = difyUploadFileMapper.getFileById(uploadId);
|
DifyUploadFile uploadPath = difyUploadFileMapper.getFileById(uploadId);
|
||||||
|
|
|
@ -261,7 +261,14 @@ public class PdfConversionTaskService {
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. 创建MultipartFile
|
// 2. 创建MultipartFile
|
||||||
MultipartFile multipartFile = createMultipartFileFromPath(path);
|
MultipartFile multipartFile = createMultipartFileFromPath(path, taskInfo.getName());
|
||||||
|
//修改文件名
|
||||||
|
// multipartFile = new SimpleMultipartFile(
|
||||||
|
// multipartFile.getName(),
|
||||||
|
// taskInfo.getName() + ".md",
|
||||||
|
// multipartFile.getContentType(),
|
||||||
|
// multipartFile.getBytes()
|
||||||
|
// );
|
||||||
|
|
||||||
|
|
||||||
// 3. 调用上传接口
|
// 3. 调用上传接口
|
||||||
|
@ -278,9 +285,8 @@ public class PdfConversionTaskService {
|
||||||
/**
|
/**
|
||||||
* 从文件路径创建MultipartFile (JDK 1.8兼容版本)
|
* 从文件路径创建MultipartFile (JDK 1.8兼容版本)
|
||||||
*/
|
*/
|
||||||
private MultipartFile createMultipartFileFromPath(Path filePath) throws IOException {
|
private MultipartFile createMultipartFileFromPath(Path filePath, String fileName) throws IOException {
|
||||||
File file = filePath.toFile();
|
File file = filePath.toFile();
|
||||||
String fileName = file.getName();
|
|
||||||
String originalFileName = fileName.endsWith(".md") ? fileName : fileName + ".md";
|
String originalFileName = fileName.endsWith(".md") ? fileName : fileName + ".md";
|
||||||
|
|
||||||
// JDK 1.8兼容的文件读取方式
|
// JDK 1.8兼容的文件读取方式
|
||||||
|
|
|
@ -16,4 +16,7 @@ public class Constants {
|
||||||
public static final String BEARER = "Bearer ";
|
public static final String BEARER = "Bearer ";
|
||||||
|
|
||||||
public static final String DATABASE_API = "/v1/datasets";
|
public static final String DATABASE_API = "/v1/datasets";
|
||||||
|
|
||||||
|
|
||||||
|
public static final String PDF_TASK_REDIS_KEY = "pdf:conversion:tasks";
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,4 +48,13 @@ dify:
|
||||||
email: bjtds@bjtds.com # 请替换为实际的 Dify 服务邮箱,若不需要调用 server相关接口可不填
|
email: bjtds@bjtds.com # 请替换为实际的 Dify 服务邮箱,若不需要调用 server相关接口可不填
|
||||||
password: 123456Aa # 请替换为实际的 Dify 服务密码,若不需要调用 server相关接口可不填
|
password: 123456Aa # 请替换为实际的 Dify 服务密码,若不需要调用 server相关接口可不填
|
||||||
dataset:
|
dataset:
|
||||||
api-key: ${dify-dataset-api-key:dataset-zVa4uJBUem96P19o8iBtyihQ} # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
|
api-key: ${dify-dataset-api-key:dataset-zVa4uJBUem96P19o8iBtyihQ} # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
|
||||||
|
|
||||||
|
# PDF转换服务配置
|
||||||
|
pdf:
|
||||||
|
conversion:
|
||||||
|
service:
|
||||||
|
url: ${pdf-conversion-url:http://192.168.1.211:12201} # PDF转换服务地址
|
||||||
|
api-url: ${pdf-conversion-api-url:http://192.168.1.211:1050/v1/chat/completions} # QwenVL API服务地址
|
||||||
|
model-name: ${pdf-conversion-model:qwenvl} # 使用的模型名称
|
||||||
|
max-workers: ${pdf-conversion-max-workers:10} # 并发线程数
|
||||||
|
|
|
@ -48,4 +48,13 @@ dify:
|
||||||
email: bjtds@bjtds.com # 请替换为实际的 Dify 服务邮箱,若不需要调用 server相关接口可不填
|
email: bjtds@bjtds.com # 请替换为实际的 Dify 服务邮箱,若不需要调用 server相关接口可不填
|
||||||
password: 123456Aa # 请替换为实际的 Dify 服务密码,若不需要调用 server相关接口可不填
|
password: 123456Aa # 请替换为实际的 Dify 服务密码,若不需要调用 server相关接口可不填
|
||||||
dataset:
|
dataset:
|
||||||
api-key: ${dify-dataset-api-key:dataset-0Hij9IwoWYbJe1vvwVh8y7DS} # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
|
api-key: ${dify-dataset-api-key:dataset-0Hij9IwoWYbJe1vvwVh8y7DS} # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
|
||||||
|
|
||||||
|
# PDF转换服务配置
|
||||||
|
pdf:
|
||||||
|
conversion:
|
||||||
|
service:
|
||||||
|
url: ${pdf-conversion-url:http://192.168.8.253:12201} # PDF转换服务地址
|
||||||
|
api-url: ${pdf-conversion-api-url:http://192.168.8.253:1050/v1/chat/completions} # QwenVL API服务地址
|
||||||
|
model-name: ${pdf-conversion-model:qwenvl} # 使用的模型名称
|
||||||
|
max-workers: ${pdf-conversion-max-workers:10} # 并发线程数
|
|
@ -18,13 +18,13 @@ spring:
|
||||||
# 可选值: wuhan, beijing
|
# 可选值: wuhan, beijing
|
||||||
# 线上部署时可通过环境变量 SPRING_PROFILES_ACTIVE 覆盖
|
# 线上部署时可通过环境变量 SPRING_PROFILES_ACTIVE 覆盖
|
||||||
profiles:
|
profiles:
|
||||||
active: beijing
|
active: wuhan
|
||||||
|
|
||||||
# 文件上传配置
|
# 文件上传配置
|
||||||
servlet:
|
servlet:
|
||||||
multipart:
|
multipart:
|
||||||
max-request-size: 10MB
|
max-request-size: 100MB
|
||||||
max-file-size: 10MB
|
max-file-size: 100MB
|
||||||
|
|
||||||
# 数据源配置已移至对应的环境配置文件
|
# 数据源配置已移至对应的环境配置文件
|
||||||
# application-wuhan.yml 和 application-beijing.yml
|
# application-wuhan.yml 和 application-beijing.yml
|
||||||
|
|
Loading…
Reference in New Issue