feat:文件上传是增加source_url文件溯源预览路径
This commit is contained in:
parent
a023719e5c
commit
5faa1aa59b
|
@ -0,0 +1,16 @@
|
||||||
|
package com.bjtds.brichat.entity.dify;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
@Data
|
||||||
|
public class DifyGetMetadatasRes {
|
||||||
|
|
||||||
|
@JsonProperty("doc_metadata")
|
||||||
|
private List<DifyMetadata> docMetadatas;
|
||||||
|
|
||||||
|
@JsonProperty("built_in_field_enabled")
|
||||||
|
private Boolean builtInFieldEnabled;
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
package com.bjtds.brichat.entity.dify;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
@Data
|
||||||
|
public class DifyMatedataAnnoReq {
|
||||||
|
|
||||||
|
@JsonProperty("document_id")
|
||||||
|
private String documentId;
|
||||||
|
@JsonProperty("metadata_list")
|
||||||
|
private List<DifyMetadata> metadataList ;
|
||||||
|
}
|
|
@ -0,0 +1,22 @@
|
||||||
|
package com.bjtds.brichat.entity.dify;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import javax.websocket.OnOpen;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@AllArgsConstructor
|
||||||
|
@NoArgsConstructor
|
||||||
|
public class DifyMetadata {
|
||||||
|
private String id;
|
||||||
|
private String type;
|
||||||
|
private String name;
|
||||||
|
private String value;
|
||||||
|
@JsonProperty("use_count")
|
||||||
|
private Integer useCount;
|
||||||
|
}
|
|
@ -1,8 +1,7 @@
|
||||||
package com.bjtds.brichat.service.dify;
|
package com.bjtds.brichat.service.dify;
|
||||||
|
|
||||||
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
||||||
import com.bjtds.brichat.entity.dify.DatasetDto;
|
import com.bjtds.brichat.entity.dify.*;
|
||||||
import com.bjtds.brichat.entity.dify.DifyDatasetResponse;
|
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
|
@ -24,4 +23,28 @@ public interface DifyDatasetApiService{
|
||||||
ResponseEntity<Map> createDocumentByFile(
|
ResponseEntity<Map> createDocumentByFile(
|
||||||
DocumentUploadReq request,
|
DocumentUploadReq request,
|
||||||
MultipartFile file) throws IOException;
|
MultipartFile file) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 为知识库添加元数据
|
||||||
|
* @param datasetId
|
||||||
|
* @param metadata
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
ResponseEntity<DifyMetadata> addMetadata(String datasetId, DifyMetadata metadata);
|
||||||
|
|
||||||
|
/***
|
||||||
|
* 获取知识库元数据列表
|
||||||
|
* @param datasetId
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
ResponseEntity<DifyGetMetadatasRes> getMetadatas(String datasetId);
|
||||||
|
|
||||||
|
|
||||||
|
/***
|
||||||
|
* 为文档标注元数据
|
||||||
|
* @param datasetId
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
ResponseEntity<Map> setMetadataForDoc(String datasetId, DifyMatedataAnnoReq metadataAnnoReq);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
package com.bjtds.brichat.service.dify.impl;
|
package com.bjtds.brichat.service.dify.impl;
|
||||||
|
|
||||||
|
import cn.hutool.json.JSONUtil;
|
||||||
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
import com.bjtds.brichat.entity.dataset.DocumentUploadReq;
|
||||||
import com.bjtds.brichat.entity.dataset.RetrievalModel;
|
import com.bjtds.brichat.entity.dataset.RetrievalModel;
|
||||||
import com.bjtds.brichat.entity.dify.DatasetDto;
|
import com.bjtds.brichat.entity.dify.*;
|
||||||
import com.bjtds.brichat.entity.dify.DifyDatasetResponse;
|
|
||||||
import com.bjtds.brichat.entity.dto.PdfConversionResponse;
|
import com.bjtds.brichat.entity.dto.PdfConversionResponse;
|
||||||
import com.bjtds.brichat.entity.dto.PdfTaskDto;
|
import com.bjtds.brichat.entity.dto.PdfTaskDto;
|
||||||
import com.bjtds.brichat.mapper.postgresql.DifyDatasetsMapper;
|
import com.bjtds.brichat.mapper.postgresql.DifyDatasetsMapper;
|
||||||
|
@ -14,11 +14,17 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import io.github.guoshiqiufeng.dify.dataset.DifyDataset;
|
import io.github.guoshiqiufeng.dify.dataset.DifyDataset;
|
||||||
import io.github.guoshiqiufeng.dify.dataset.dto.request.DatasetInfoRequest;
|
import io.github.guoshiqiufeng.dify.dataset.dto.request.DatasetInfoRequest;
|
||||||
import io.github.guoshiqiufeng.dify.dataset.dto.response.DatasetInfoResponse;
|
import io.github.guoshiqiufeng.dify.dataset.dto.response.DatasetInfoResponse;
|
||||||
|
import io.github.guoshiqiufeng.dify.dataset.dto.response.UploadFileInfoResponse;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||||
|
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||||
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
|
import org.apache.poi.hwpf.usermodel.Range;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFRun;
|
import org.apache.poi.xwpf.usermodel.XWPFRun;
|
||||||
|
import org.docx4j.openpackaging.exceptions.Docx4JException;
|
||||||
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
|
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -37,9 +43,7 @@ import org.springframework.web.client.RestTemplate;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
import javax.annotation.Resource;
|
import javax.annotation.Resource;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.*;
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -85,6 +89,104 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
@Resource
|
@Resource
|
||||||
private DifyDatasetsMapper difyDatasetsMapper;
|
private DifyDatasetsMapper difyDatasetsMapper;
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ResponseEntity<DifyMetadata> addMetadata(String datasetId, DifyMetadata metadata) {
|
||||||
|
// 1. 设置请求URL
|
||||||
|
String url = difyUrl+ Constants.DATABASE_API + "/" + datasetId + "/metadata";
|
||||||
|
|
||||||
|
// 2. 设置请求头
|
||||||
|
HttpHeaders headers = new HttpHeaders();
|
||||||
|
headers.set("Authorization", Constants.BEARER +apiKey); // Bearer认证
|
||||||
|
headers.setContentType(MediaType.APPLICATION_JSON); // 设置JSON类型
|
||||||
|
|
||||||
|
// 3. 构建请求体(包含name和description的JSON对象)
|
||||||
|
Map<String, String> requestBody = new HashMap<>();
|
||||||
|
requestBody.put("type", metadata.getType());
|
||||||
|
requestBody.put("name", metadata.getName());
|
||||||
|
// 4. 创建包含请求体和头的HttpEntity
|
||||||
|
HttpEntity<Map<String, String>> requestEntity = new HttpEntity<>(requestBody, headers);
|
||||||
|
|
||||||
|
// 5. 发送POST请求
|
||||||
|
return restTemplate.exchange(
|
||||||
|
url,
|
||||||
|
HttpMethod.POST,
|
||||||
|
requestEntity,
|
||||||
|
new ParameterizedTypeReference<DifyMetadata>() {} // 解决泛型类型擦除问题
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ResponseEntity<DifyGetMetadatasRes> getMetadatas(String datasetId) {
|
||||||
|
|
||||||
|
// 1. 设置请求URL
|
||||||
|
String url = difyUrl+ Constants.DATABASE_API + "/" + datasetId + "/metadata";
|
||||||
|
|
||||||
|
// 2. 设置请求头
|
||||||
|
HttpHeaders headers = new HttpHeaders();
|
||||||
|
headers.set("Authorization", Constants.BEARER +apiKey); // Bearer认证
|
||||||
|
headers.setContentType(MediaType.APPLICATION_JSON); // 设置JSON类型
|
||||||
|
HttpEntity<?> requestEntity = new HttpEntity<>(headers);
|
||||||
|
return restTemplate.exchange(
|
||||||
|
url,
|
||||||
|
HttpMethod.GET,
|
||||||
|
requestEntity,
|
||||||
|
new ParameterizedTypeReference<DifyGetMetadatasRes>() {} // 解决泛型类型擦除问题
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ResponseEntity<Map> setMetadataForDoc(String datasetId, DifyMatedataAnnoReq metadataAnnoReq) {
|
||||||
|
|
||||||
|
// 1. 设置请求URL
|
||||||
|
String url = difyUrl+ Constants.DATABASE_API + "/" + datasetId + "/documents/metadata";
|
||||||
|
// 2. 设置请求头
|
||||||
|
HttpHeaders headers = new HttpHeaders();
|
||||||
|
headers.set("Authorization", Constants.BEARER +apiKey); // Bearer认证
|
||||||
|
headers.setContentType(MediaType.APPLICATION_JSON); // 设置JSON类型
|
||||||
|
|
||||||
|
// 3. 构建请求体
|
||||||
|
Map<String, Object> requestBody = new HashMap<>();
|
||||||
|
|
||||||
|
// 构建operation_data数组 - 修复:应该是数组格式
|
||||||
|
List<DifyMatedataAnnoReq> operationDataList = new ArrayList<>();
|
||||||
|
operationDataList.add(metadataAnnoReq);
|
||||||
|
requestBody.put("operation_data", operationDataList);
|
||||||
|
|
||||||
|
// 4. 创建包含请求体和头的HttpEntity
|
||||||
|
HttpEntity<Map<String, Object>> requestEntity = new HttpEntity<>(requestBody, headers);
|
||||||
|
|
||||||
|
// 5. 发送POST请求 - 修复:使用String类型接收响应,然后手动处理
|
||||||
|
try {
|
||||||
|
ResponseEntity<String> response = restTemplate.exchange(
|
||||||
|
url,
|
||||||
|
HttpMethod.POST,
|
||||||
|
requestEntity,
|
||||||
|
String.class
|
||||||
|
);
|
||||||
|
|
||||||
|
// 手动构建返回的Map
|
||||||
|
Map<String, Object> resultMap = new HashMap<>();
|
||||||
|
resultMap.put("success", true);
|
||||||
|
resultMap.put("message", "元数据设置成功");
|
||||||
|
resultMap.put("response", response.getBody());
|
||||||
|
|
||||||
|
return ResponseEntity.status(response.getStatusCode()).body(resultMap);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("设置文档元数据失败: {}", e.getMessage(), e);
|
||||||
|
Map<String, Object> errorMap = new HashMap<>();
|
||||||
|
errorMap.put("success", false);
|
||||||
|
errorMap.put("message", "元数据设置失败: " + e.getMessage());
|
||||||
|
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ResponseEntity<DatasetDto> createDataset(String name, String description) {
|
public ResponseEntity<DatasetDto> createDataset(String name, String description) {
|
||||||
// 1. 设置请求URL
|
// 1. 设置请求URL
|
||||||
|
@ -190,6 +292,9 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
// 2. 执行正常文件上传逻辑
|
// 2. 执行正常文件上传逻辑
|
||||||
return handleNormalFile(request, file);
|
return handleNormalFile(request, file);
|
||||||
}
|
}
|
||||||
|
//3 为文件标注预览路径的元数据
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -306,18 +411,62 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
|
|
||||||
// 发送请求
|
// 发送请求
|
||||||
try {
|
try {
|
||||||
return restTemplate.exchange(
|
ResponseEntity<Map> exchange = restTemplate.exchange(
|
||||||
url,
|
url,
|
||||||
HttpMethod.POST,
|
HttpMethod.POST,
|
||||||
requestEntity,
|
requestEntity,
|
||||||
Map.class,
|
Map.class,
|
||||||
uriVariables
|
uriVariables
|
||||||
);
|
);
|
||||||
|
|
||||||
|
//3. 为文档添加元数据
|
||||||
|
Map<String,String> document = ( Map<String,String> ) exchange.getBody().get("document");
|
||||||
|
String documentId = document.get("id");
|
||||||
|
|
||||||
|
UploadFileInfoResponse uploadFileInfoResponse = difyDatasetService.uploadFileInfo(request.getDatasetId(), documentId);
|
||||||
|
String urls = uploadFileInfoResponse.getUrl();
|
||||||
|
String fullUrl = difyUrl + urls;
|
||||||
|
setSourceUrlMatedata(request.getDatasetId(),documentId,fullUrl);
|
||||||
|
|
||||||
|
return exchange;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException("文档上传失败: " + e.getMessage(), e);
|
throw new RuntimeException("文档上传失败: " + e.getMessage(), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void setSourceUrlMatedata(String datasetId,String documentId,String sourceUrl){
|
||||||
|
try {
|
||||||
|
//1. 查询知识库是否存在source元数据
|
||||||
|
ResponseEntity<DifyGetMetadatasRes> res = getMetadatas(datasetId);
|
||||||
|
|
||||||
|
List<DifyMetadata> docMetadatas = res.getBody().getDocMetadatas();
|
||||||
|
DifyMetadata metadata = null;
|
||||||
|
DifyMatedataAnnoReq metadataAnnoReq = new DifyMatedataAnnoReq();
|
||||||
|
metadataAnnoReq.setDocumentId(documentId);
|
||||||
|
if (docMetadatas == null || docMetadatas.isEmpty() ||docMetadatas.stream().noneMatch(m -> "source_url".equals(m.getName()))) {
|
||||||
|
metadata = addMetadata(datasetId, DifyMetadata.builder().type("string").name("source_url").build()).getBody();
|
||||||
|
}
|
||||||
|
if (metadata == null) {
|
||||||
|
metadata = docMetadatas.stream().map(m -> {
|
||||||
|
if ("source_url".equals(m.getName())) {
|
||||||
|
m.setValue(sourceUrl);
|
||||||
|
}
|
||||||
|
return m;
|
||||||
|
}).findFirst().orElse(null);
|
||||||
|
}
|
||||||
|
metadata.setValue(sourceUrl);
|
||||||
|
metadataAnnoReq.setMetadataList(Collections.singletonList(metadata));
|
||||||
|
ResponseEntity<Map> result = setMetadataForDoc(datasetId, metadataAnnoReq);
|
||||||
|
logger.info("为文档 {} 设置source_url元数据成功", documentId);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
// 元数据设置失败不应该影响文档上传的主流程
|
||||||
|
logger.warn("为文档 {} 设置source_url元数据失败,但文档上传成功: {}", documentId, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 调用PDF转换服务
|
* 调用PDF转换服务
|
||||||
*/
|
*/
|
||||||
|
@ -414,8 +563,8 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
|
|
||||||
// 读取转换后的文件内容 - Java 8兼容
|
// 读取转换后的文件内容 - Java 8兼容
|
||||||
byte[] docxBytes;
|
byte[] docxBytes;
|
||||||
try (java.io.FileInputStream fis = new java.io.FileInputStream(tempFile)) {
|
try (FileInputStream fis = new FileInputStream(tempFile)) {
|
||||||
docxBytes = org.apache.commons.io.IOUtils.toByteArray(fis);
|
docxBytes = IOUtils.toByteArray(fis);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("docx4j转换成功: {} -> {} (大小: {} -> {} bytes)",
|
logger.info("docx4j转换成功: {} -> {} (大小: {} -> {} bytes)",
|
||||||
|
@ -440,7 +589,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (org.docx4j.openpackaging.exceptions.Docx4JException e) {
|
} catch (Docx4JException e) {
|
||||||
logger.warn("docx4j无法识别文件格式,可能是较旧的DOC格式: {}", e.getMessage());
|
logger.warn("docx4j无法识别文件格式,可能是较旧的DOC格式: {}", e.getMessage());
|
||||||
return handleOlderDocFormat(docFile, originalFilename);
|
return handleOlderDocFormat(docFile, originalFilename);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -520,7 +669,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 读取DOC文档
|
// 读取DOC文档
|
||||||
try (java.io.InputStream inputStream = docFile.getInputStream()) {
|
try (InputStream inputStream = docFile.getInputStream()) {
|
||||||
docDocument = new HWPFDocument(inputStream);
|
docDocument = new HWPFDocument(inputStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -561,7 +710,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
private void convertDocContentToDocx(HWPFDocument docDocument, XWPFDocument docxDocument) {
|
private void convertDocContentToDocx(HWPFDocument docDocument, XWPFDocument docxDocument) {
|
||||||
try {
|
try {
|
||||||
// 获取文档范围
|
// 获取文档范围
|
||||||
org.apache.poi.hwpf.usermodel.Range documentRange = docDocument.getRange();
|
Range documentRange = docDocument.getRange();
|
||||||
|
|
||||||
// 按段落处理
|
// 按段落处理
|
||||||
int numParagraphs = documentRange.numParagraphs();
|
int numParagraphs = documentRange.numParagraphs();
|
||||||
|
@ -569,7 +718,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
|
|
||||||
for (int i = 0; i < numParagraphs; i++) {
|
for (int i = 0; i < numParagraphs; i++) {
|
||||||
try {
|
try {
|
||||||
org.apache.poi.hwpf.usermodel.Paragraph hwpfParagraph = documentRange.getParagraph(i);
|
Paragraph hwpfParagraph = documentRange.getParagraph(i);
|
||||||
String paragraphText = hwpfParagraph.text();
|
String paragraphText = hwpfParagraph.text();
|
||||||
|
|
||||||
// 跳过空段落和只包含控制字符的段落
|
// 跳过空段落和只包含控制字符的段落
|
||||||
|
@ -585,7 +734,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
int numCharacterRuns = hwpfParagraph.numCharacterRuns();
|
int numCharacterRuns = hwpfParagraph.numCharacterRuns();
|
||||||
for (int j = 0; j < numCharacterRuns; j++) {
|
for (int j = 0; j < numCharacterRuns; j++) {
|
||||||
try {
|
try {
|
||||||
org.apache.poi.hwpf.usermodel.CharacterRun characterRun = hwpfParagraph.getCharacterRun(j);
|
CharacterRun characterRun = hwpfParagraph.getCharacterRun(j);
|
||||||
String runText = characterRun.text();
|
String runText = characterRun.text();
|
||||||
|
|
||||||
if (runText != null && !runText.trim().isEmpty()) {
|
if (runText != null && !runText.trim().isEmpty()) {
|
||||||
|
@ -744,7 +893,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
private String extractTextFromDocSimple(MultipartFile docFile) {
|
private String extractTextFromDocSimple(MultipartFile docFile) {
|
||||||
try {
|
try {
|
||||||
// 尝试使用HWPFDocument提取文本(最基本的方式)
|
// 尝试使用HWPFDocument提取文本(最基本的方式)
|
||||||
try (java.io.InputStream inputStream = docFile.getInputStream()) {
|
try (InputStream inputStream = docFile.getInputStream()) {
|
||||||
HWPFDocument docDocument = new HWPFDocument(inputStream);
|
HWPFDocument docDocument = new HWPFDocument(inputStream);
|
||||||
WordExtractor extractor = new WordExtractor(docDocument);
|
WordExtractor extractor = new WordExtractor(docDocument);
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
||||||
|
@ -765,7 +914,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
private byte[] convertDocToDocxUsingPOI(MultipartFile docFile) throws Exception {
|
private byte[] convertDocToDocxUsingPOI(MultipartFile docFile) throws Exception {
|
||||||
ByteArrayOutputStream baos = null;
|
ByteArrayOutputStream baos = null;
|
||||||
XWPFDocument docxDocument = null;
|
XWPFDocument docxDocument = null;
|
||||||
java.io.InputStream docInputStream = null;
|
InputStream docInputStream = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 创建输入流
|
// 创建输入流
|
||||||
|
@ -802,7 +951,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
/**
|
/**
|
||||||
* 从DOC文件中提取文本
|
* 从DOC文件中提取文本
|
||||||
*/
|
*/
|
||||||
private String extractTextFromDoc(java.io.InputStream docInputStream) throws Exception {
|
private String extractTextFromDoc(InputStream docInputStream) throws Exception {
|
||||||
HWPFDocument docDocument = null;
|
HWPFDocument docDocument = null;
|
||||||
WordExtractor extractor = null;
|
WordExtractor extractor = null;
|
||||||
|
|
||||||
|
@ -811,8 +960,8 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
if (!docInputStream.markSupported()) {
|
if (!docInputStream.markSupported()) {
|
||||||
// 如果不支持mark,将流转换为ByteArrayInputStream
|
// 如果不支持mark,将流转换为ByteArrayInputStream
|
||||||
// Java 8兼容:使用Apache Commons IO读取所有字节
|
// Java 8兼容:使用Apache Commons IO读取所有字节
|
||||||
byte[] bytes = org.apache.commons.io.IOUtils.toByteArray(docInputStream);
|
byte[] bytes = IOUtils.toByteArray(docInputStream);
|
||||||
docInputStream = new java.io.ByteArrayInputStream(bytes);
|
docInputStream = new ByteArrayInputStream(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
docInputStream.mark(Integer.MAX_VALUE);
|
docInputStream.mark(Integer.MAX_VALUE);
|
||||||
|
@ -842,11 +991,11 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
/**
|
/**
|
||||||
* 备用的文本提取方法
|
* 备用的文本提取方法
|
||||||
*/
|
*/
|
||||||
private String extractTextWithAlternativeMethod(java.io.InputStream docInputStream) throws Exception {
|
private String extractTextWithAlternativeMethod(InputStream docInputStream) throws Exception {
|
||||||
try {
|
try {
|
||||||
// 尝试使用更宽松的方式读取
|
// 尝试使用更宽松的方式读取
|
||||||
// Java 8兼容:使用Apache Commons IO读取所有字节
|
// Java 8兼容:使用Apache Commons IO读取所有字节
|
||||||
byte[] docBytes = org.apache.commons.io.IOUtils.toByteArray(docInputStream);
|
byte[] docBytes = IOUtils.toByteArray(docInputStream);
|
||||||
|
|
||||||
// 简单的文本提取 - 寻找可能的文本内容
|
// 简单的文本提取 - 寻找可能的文本内容
|
||||||
String content = new String(docBytes, "UTF-8");
|
String content = new String(docBytes, "UTF-8");
|
||||||
|
@ -905,7 +1054,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
/**
|
/**
|
||||||
* 安全关闭资源
|
* 安全关闭资源
|
||||||
*/
|
*/
|
||||||
private void closeResources(java.io.InputStream inputStream, XWPFDocument docxDocument, ByteArrayOutputStream outputStream) {
|
private void closeResources(InputStream inputStream, XWPFDocument docxDocument, ByteArrayOutputStream outputStream) {
|
||||||
try {
|
try {
|
||||||
if (inputStream != null) inputStream.close();
|
if (inputStream != null) inputStream.close();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -972,13 +1121,13 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public java.io.InputStream getInputStream() throws IOException {
|
public InputStream getInputStream() throws IOException {
|
||||||
return new java.io.ByteArrayInputStream(content);
|
return new ByteArrayInputStream(content);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void transferTo(File dest) throws IOException, IllegalStateException {
|
public void transferTo(File dest) throws IOException, IllegalStateException {
|
||||||
try (java.io.FileOutputStream fos = new java.io.FileOutputStream(dest)) {
|
try (FileOutputStream fos = new FileOutputStream(dest)) {
|
||||||
fos.write(content);
|
fos.write(content);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,6 +61,8 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
|
||||||
List<RecordDto> recordDtos = Lists.newArrayList();
|
List<RecordDto> recordDtos = Lists.newArrayList();
|
||||||
log.info("datasetPath:{}", datasetPath);
|
log.info("datasetPath:{}", datasetPath);
|
||||||
log.info("apiKey:{}", apiKey);
|
log.info("apiKey:{}", apiKey);
|
||||||
|
|
||||||
|
//同步查询,我需要异步
|
||||||
for (String datasetId : datasetIds) {
|
for (String datasetId : datasetIds) {
|
||||||
List<RecordDto> recordDtoList = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto);
|
List<RecordDto> recordDtoList = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto);
|
||||||
if (recordDtoList != null && !recordDtoList.isEmpty()) {
|
if (recordDtoList != null && !recordDtoList.isEmpty()) {
|
||||||
|
|
|
@ -58,11 +58,13 @@ public class TraceSourceServiceImpl implements TraceSourceService {
|
||||||
TraceResult traceResult;
|
TraceResult traceResult;
|
||||||
try {
|
try {
|
||||||
String key = Constants.TRACE + Constants.SYMBOL_SEMICOLON + sysMessageId;
|
String key = Constants.TRACE + Constants.SYMBOL_SEMICOLON + sysMessageId;
|
||||||
|
Object o = redisTemplate.opsForValue().get(key);
|
||||||
//从redis中获取
|
//从redis中获取
|
||||||
traceResult = (TraceResult) redisTemplate.opsForValue().get(key);
|
traceResult = (TraceResult) o;
|
||||||
if (traceResult != null) {
|
if (traceResult != null) {
|
||||||
return traceResult;
|
return traceResult;
|
||||||
}
|
}
|
||||||
|
log.error("溯源文件为空 sysMessageId:{}", sysMessageId);
|
||||||
return null;
|
return null;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("溯源文件异常e:", e);
|
log.error("溯源文件异常e:", e);
|
||||||
|
|
Loading…
Reference in New Issue