diff --git a/chat-server/src/main/java/com/bjtds/brichat/entity/dify/DifyGetMetadatasRes.java b/chat-server/src/main/java/com/bjtds/brichat/entity/dify/DifyGetMetadatasRes.java new file mode 100644 index 0000000..3cda4a8 --- /dev/null +++ b/chat-server/src/main/java/com/bjtds/brichat/entity/dify/DifyGetMetadatasRes.java @@ -0,0 +1,16 @@ +package com.bjtds.brichat.entity.dify; + +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; + +import java.util.List; +@Data +public class DifyGetMetadatasRes { + + @JsonProperty("doc_metadata") + private List docMetadatas; + + @JsonProperty("built_in_field_enabled") + private Boolean builtInFieldEnabled; + +} diff --git a/chat-server/src/main/java/com/bjtds/brichat/entity/dify/DifyMatedataAnnoReq.java b/chat-server/src/main/java/com/bjtds/brichat/entity/dify/DifyMatedataAnnoReq.java new file mode 100644 index 0000000..8d72a88 --- /dev/null +++ b/chat-server/src/main/java/com/bjtds/brichat/entity/dify/DifyMatedataAnnoReq.java @@ -0,0 +1,14 @@ +package com.bjtds.brichat.entity.dify; + +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; + +import java.util.List; +@Data +public class DifyMatedataAnnoReq { + + @JsonProperty("document_id") + private String documentId; + @JsonProperty("metadata_list") + private List metadataList ; +} diff --git a/chat-server/src/main/java/com/bjtds/brichat/entity/dify/DifyMetadata.java b/chat-server/src/main/java/com/bjtds/brichat/entity/dify/DifyMetadata.java new file mode 100644 index 0000000..c5ca732 --- /dev/null +++ b/chat-server/src/main/java/com/bjtds/brichat/entity/dify/DifyMetadata.java @@ -0,0 +1,22 @@ +package com.bjtds.brichat.entity.dify; + +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import javax.websocket.OnOpen; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class DifyMetadata { + private String id; + private String type; + private String name; + private String value; + @JsonProperty("use_count") + private Integer useCount; +} \ No newline at end of file diff --git a/chat-server/src/main/java/com/bjtds/brichat/service/dify/DifyDatasetApiService.java b/chat-server/src/main/java/com/bjtds/brichat/service/dify/DifyDatasetApiService.java index b99ce58..baf5560 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/service/dify/DifyDatasetApiService.java +++ b/chat-server/src/main/java/com/bjtds/brichat/service/dify/DifyDatasetApiService.java @@ -1,8 +1,7 @@ package com.bjtds.brichat.service.dify; import com.bjtds.brichat.entity.dataset.DocumentUploadReq; -import com.bjtds.brichat.entity.dify.DatasetDto; -import com.bjtds.brichat.entity.dify.DifyDatasetResponse; +import com.bjtds.brichat.entity.dify.*; import org.springframework.http.ResponseEntity; import org.springframework.web.multipart.MultipartFile; @@ -24,4 +23,28 @@ public interface DifyDatasetApiService{ ResponseEntity createDocumentByFile( DocumentUploadReq request, MultipartFile file) throws IOException; + + /** + * 为知识库添加元数据 + * @param datasetId + * @param metadata + * @return + */ + ResponseEntity addMetadata(String datasetId, DifyMetadata metadata); + + /*** + * 获取知识库元数据列表 + * @param datasetId + * @return + */ + ResponseEntity getMetadatas(String datasetId); + + + /*** + * 为文档标注元数据 + * @param datasetId + * @return + */ + ResponseEntity setMetadataForDoc(String datasetId, DifyMatedataAnnoReq metadataAnnoReq); + } diff --git a/chat-server/src/main/java/com/bjtds/brichat/service/dify/impl/DifyDatasetApiServiceImpl.java b/chat-server/src/main/java/com/bjtds/brichat/service/dify/impl/DifyDatasetApiServiceImpl.java index ae9ccda..309979f 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/service/dify/impl/DifyDatasetApiServiceImpl.java +++ b/chat-server/src/main/java/com/bjtds/brichat/service/dify/impl/DifyDatasetApiServiceImpl.java @@ -1,9 +1,9 @@ package com.bjtds.brichat.service.dify.impl; +import cn.hutool.json.JSONUtil; import com.bjtds.brichat.entity.dataset.DocumentUploadReq; import com.bjtds.brichat.entity.dataset.RetrievalModel; -import com.bjtds.brichat.entity.dify.DatasetDto; -import com.bjtds.brichat.entity.dify.DifyDatasetResponse; +import com.bjtds.brichat.entity.dify.*; import com.bjtds.brichat.entity.dto.PdfConversionResponse; import com.bjtds.brichat.entity.dto.PdfTaskDto; import com.bjtds.brichat.mapper.postgresql.DifyDatasetsMapper; @@ -14,11 +14,17 @@ import com.fasterxml.jackson.databind.ObjectMapper; import io.github.guoshiqiufeng.dify.dataset.DifyDataset; import io.github.guoshiqiufeng.dify.dataset.dto.request.DatasetInfoRequest; import io.github.guoshiqiufeng.dify.dataset.dto.response.DatasetInfoResponse; +import io.github.guoshiqiufeng.dify.dataset.dto.response.UploadFileInfoResponse; +import org.apache.commons.io.IOUtils; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.hwpf.usermodel.CharacterRun; +import org.apache.poi.hwpf.usermodel.Paragraph; +import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFRun; +import org.docx4j.openpackaging.exceptions.Docx4JException; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,9 +43,7 @@ import org.springframework.web.client.RestTemplate; import org.springframework.web.multipart.MultipartFile; import javax.annotation.Resource; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; +import java.io.*; import java.util.*; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -85,6 +89,104 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { @Resource private DifyDatasetsMapper difyDatasetsMapper; + + @Override + public ResponseEntity addMetadata(String datasetId, DifyMetadata metadata) { + // 1. 设置请求URL + String url = difyUrl+ Constants.DATABASE_API + "/" + datasetId + "/metadata"; + + // 2. 设置请求头 + HttpHeaders headers = new HttpHeaders(); + headers.set("Authorization", Constants.BEARER +apiKey); // Bearer认证 + headers.setContentType(MediaType.APPLICATION_JSON); // 设置JSON类型 + + // 3. 构建请求体(包含name和description的JSON对象) + Map requestBody = new HashMap<>(); + requestBody.put("type", metadata.getType()); + requestBody.put("name", metadata.getName()); + // 4. 创建包含请求体和头的HttpEntity + HttpEntity> requestEntity = new HttpEntity<>(requestBody, headers); + + // 5. 发送POST请求 + return restTemplate.exchange( + url, + HttpMethod.POST, + requestEntity, + new ParameterizedTypeReference() {} // 解决泛型类型擦除问题 + ); + + + } + + + @Override + public ResponseEntity getMetadatas(String datasetId) { + + // 1. 设置请求URL + String url = difyUrl+ Constants.DATABASE_API + "/" + datasetId + "/metadata"; + + // 2. 设置请求头 + HttpHeaders headers = new HttpHeaders(); + headers.set("Authorization", Constants.BEARER +apiKey); // Bearer认证 + headers.setContentType(MediaType.APPLICATION_JSON); // 设置JSON类型 + HttpEntity requestEntity = new HttpEntity<>(headers); + return restTemplate.exchange( + url, + HttpMethod.GET, + requestEntity, + new ParameterizedTypeReference() {} // 解决泛型类型擦除问题 + ); + } + + + @Override + public ResponseEntity setMetadataForDoc(String datasetId, DifyMatedataAnnoReq metadataAnnoReq) { + + // 1. 设置请求URL + String url = difyUrl+ Constants.DATABASE_API + "/" + datasetId + "/documents/metadata"; + // 2. 设置请求头 + HttpHeaders headers = new HttpHeaders(); + headers.set("Authorization", Constants.BEARER +apiKey); // Bearer认证 + headers.setContentType(MediaType.APPLICATION_JSON); // 设置JSON类型 + + // 3. 构建请求体 + Map requestBody = new HashMap<>(); + + // 构建operation_data数组 - 修复:应该是数组格式 + List operationDataList = new ArrayList<>(); + operationDataList.add(metadataAnnoReq); + requestBody.put("operation_data", operationDataList); + + // 4. 创建包含请求体和头的HttpEntity + HttpEntity> requestEntity = new HttpEntity<>(requestBody, headers); + + // 5. 发送POST请求 - 修复:使用String类型接收响应,然后手动处理 + try { + ResponseEntity response = restTemplate.exchange( + url, + HttpMethod.POST, + requestEntity, + String.class + ); + + // 手动构建返回的Map + Map resultMap = new HashMap<>(); + resultMap.put("success", true); + resultMap.put("message", "元数据设置成功"); + resultMap.put("response", response.getBody()); + + return ResponseEntity.status(response.getStatusCode()).body(resultMap); + + } catch (Exception e) { + logger.error("设置文档元数据失败: {}", e.getMessage(), e); + Map errorMap = new HashMap<>(); + errorMap.put("success", false); + errorMap.put("message", "元数据设置失败: " + e.getMessage()); + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMap); + } + } + + @Override public ResponseEntity createDataset(String name, String description) { // 1. 设置请求URL @@ -190,6 +292,9 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { // 2. 执行正常文件上传逻辑 return handleNormalFile(request, file); } + //3 为文件标注预览路径的元数据 + + } /** @@ -306,18 +411,62 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { // 发送请求 try { - return restTemplate.exchange( + ResponseEntity exchange = restTemplate.exchange( url, HttpMethod.POST, requestEntity, Map.class, uriVariables ); + + //3. 为文档添加元数据 + Map document = ( Map ) exchange.getBody().get("document"); + String documentId = document.get("id"); + + UploadFileInfoResponse uploadFileInfoResponse = difyDatasetService.uploadFileInfo(request.getDatasetId(), documentId); + String urls = uploadFileInfoResponse.getUrl(); + String fullUrl = difyUrl + urls; + setSourceUrlMatedata(request.getDatasetId(),documentId,fullUrl); + + return exchange; } catch (Exception e) { throw new RuntimeException("文档上传失败: " + e.getMessage(), e); } } + private void setSourceUrlMatedata(String datasetId,String documentId,String sourceUrl){ + try { + //1. 查询知识库是否存在source元数据 + ResponseEntity res = getMetadatas(datasetId); + + List docMetadatas = res.getBody().getDocMetadatas(); + DifyMetadata metadata = null; + DifyMatedataAnnoReq metadataAnnoReq = new DifyMatedataAnnoReq(); + metadataAnnoReq.setDocumentId(documentId); + if (docMetadatas == null || docMetadatas.isEmpty() ||docMetadatas.stream().noneMatch(m -> "source_url".equals(m.getName()))) { + metadata = addMetadata(datasetId, DifyMetadata.builder().type("string").name("source_url").build()).getBody(); + } + if (metadata == null) { + metadata = docMetadatas.stream().map(m -> { + if ("source_url".equals(m.getName())) { + m.setValue(sourceUrl); + } + return m; + }).findFirst().orElse(null); + } + metadata.setValue(sourceUrl); + metadataAnnoReq.setMetadataList(Collections.singletonList(metadata)); + ResponseEntity result = setMetadataForDoc(datasetId, metadataAnnoReq); + logger.info("为文档 {} 设置source_url元数据成功", documentId); + + } catch (Exception e) { + // 元数据设置失败不应该影响文档上传的主流程 + logger.warn("为文档 {} 设置source_url元数据失败,但文档上传成功: {}", documentId, e.getMessage()); + } + } + + + /** * 调用PDF转换服务 */ @@ -414,8 +563,8 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { // 读取转换后的文件内容 - Java 8兼容 byte[] docxBytes; - try (java.io.FileInputStream fis = new java.io.FileInputStream(tempFile)) { - docxBytes = org.apache.commons.io.IOUtils.toByteArray(fis); + try (FileInputStream fis = new FileInputStream(tempFile)) { + docxBytes = IOUtils.toByteArray(fis); } logger.info("docx4j转换成功: {} -> {} (大小: {} -> {} bytes)", @@ -440,7 +589,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { } } - } catch (org.docx4j.openpackaging.exceptions.Docx4JException e) { + } catch (Docx4JException e) { logger.warn("docx4j无法识别文件格式,可能是较旧的DOC格式: {}", e.getMessage()); return handleOlderDocFormat(docFile, originalFilename); } catch (Exception e) { @@ -520,7 +669,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { try { // 读取DOC文档 - try (java.io.InputStream inputStream = docFile.getInputStream()) { + try (InputStream inputStream = docFile.getInputStream()) { docDocument = new HWPFDocument(inputStream); } @@ -561,7 +710,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { private void convertDocContentToDocx(HWPFDocument docDocument, XWPFDocument docxDocument) { try { // 获取文档范围 - org.apache.poi.hwpf.usermodel.Range documentRange = docDocument.getRange(); + Range documentRange = docDocument.getRange(); // 按段落处理 int numParagraphs = documentRange.numParagraphs(); @@ -569,7 +718,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { for (int i = 0; i < numParagraphs; i++) { try { - org.apache.poi.hwpf.usermodel.Paragraph hwpfParagraph = documentRange.getParagraph(i); + Paragraph hwpfParagraph = documentRange.getParagraph(i); String paragraphText = hwpfParagraph.text(); // 跳过空段落和只包含控制字符的段落 @@ -585,7 +734,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { int numCharacterRuns = hwpfParagraph.numCharacterRuns(); for (int j = 0; j < numCharacterRuns; j++) { try { - org.apache.poi.hwpf.usermodel.CharacterRun characterRun = hwpfParagraph.getCharacterRun(j); + CharacterRun characterRun = hwpfParagraph.getCharacterRun(j); String runText = characterRun.text(); if (runText != null && !runText.trim().isEmpty()) { @@ -744,7 +893,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { private String extractTextFromDocSimple(MultipartFile docFile) { try { // 尝试使用HWPFDocument提取文本(最基本的方式) - try (java.io.InputStream inputStream = docFile.getInputStream()) { + try (InputStream inputStream = docFile.getInputStream()) { HWPFDocument docDocument = new HWPFDocument(inputStream); WordExtractor extractor = new WordExtractor(docDocument); String text = extractor.getText(); @@ -765,7 +914,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { private byte[] convertDocToDocxUsingPOI(MultipartFile docFile) throws Exception { ByteArrayOutputStream baos = null; XWPFDocument docxDocument = null; - java.io.InputStream docInputStream = null; + InputStream docInputStream = null; try { // 创建输入流 @@ -802,7 +951,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { /** * 从DOC文件中提取文本 */ - private String extractTextFromDoc(java.io.InputStream docInputStream) throws Exception { + private String extractTextFromDoc(InputStream docInputStream) throws Exception { HWPFDocument docDocument = null; WordExtractor extractor = null; @@ -811,8 +960,8 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { if (!docInputStream.markSupported()) { // 如果不支持mark,将流转换为ByteArrayInputStream // Java 8兼容:使用Apache Commons IO读取所有字节 - byte[] bytes = org.apache.commons.io.IOUtils.toByteArray(docInputStream); - docInputStream = new java.io.ByteArrayInputStream(bytes); + byte[] bytes = IOUtils.toByteArray(docInputStream); + docInputStream = new ByteArrayInputStream(bytes); } docInputStream.mark(Integer.MAX_VALUE); @@ -842,11 +991,11 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { /** * 备用的文本提取方法 */ - private String extractTextWithAlternativeMethod(java.io.InputStream docInputStream) throws Exception { + private String extractTextWithAlternativeMethod(InputStream docInputStream) throws Exception { try { // 尝试使用更宽松的方式读取 // Java 8兼容:使用Apache Commons IO读取所有字节 - byte[] docBytes = org.apache.commons.io.IOUtils.toByteArray(docInputStream); + byte[] docBytes = IOUtils.toByteArray(docInputStream); // 简单的文本提取 - 寻找可能的文本内容 String content = new String(docBytes, "UTF-8"); @@ -905,7 +1054,7 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { /** * 安全关闭资源 */ - private void closeResources(java.io.InputStream inputStream, XWPFDocument docxDocument, ByteArrayOutputStream outputStream) { + private void closeResources(InputStream inputStream, XWPFDocument docxDocument, ByteArrayOutputStream outputStream) { try { if (inputStream != null) inputStream.close(); } catch (Exception e) { @@ -972,13 +1121,13 @@ public class DifyDatasetApiServiceImpl implements DifyDatasetApiService { } @Override - public java.io.InputStream getInputStream() throws IOException { - return new java.io.ByteArrayInputStream(content); + public InputStream getInputStream() throws IOException { + return new ByteArrayInputStream(content); } @Override public void transferTo(File dest) throws IOException, IllegalStateException { - try (java.io.FileOutputStream fos = new java.io.FileOutputStream(dest)) { + try (FileOutputStream fos = new FileOutputStream(dest)) { fos.write(content); } } diff --git a/chat-server/src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java b/chat-server/src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java index 85fa374..447f7e6 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java +++ b/chat-server/src/main/java/com/bjtds/brichat/service/impl/KnowledgeBaseServiceImpl.java @@ -61,6 +61,8 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { List recordDtos = Lists.newArrayList(); log.info("datasetPath:{}", datasetPath); log.info("apiKey:{}", apiKey); + + //同步查询,我需要异步 for (String datasetId : datasetIds) { List recordDtoList = RetrievalUtil.getRetrieval(datasetPath, apiKey, datasetId, knowledgeBaseDto); if (recordDtoList != null && !recordDtoList.isEmpty()) { diff --git a/chat-server/src/main/java/com/bjtds/brichat/service/impl/TraceSourceServiceImpl.java b/chat-server/src/main/java/com/bjtds/brichat/service/impl/TraceSourceServiceImpl.java index cab53ac..638e6dd 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/service/impl/TraceSourceServiceImpl.java +++ b/chat-server/src/main/java/com/bjtds/brichat/service/impl/TraceSourceServiceImpl.java @@ -58,11 +58,13 @@ public class TraceSourceServiceImpl implements TraceSourceService { TraceResult traceResult; try { String key = Constants.TRACE + Constants.SYMBOL_SEMICOLON + sysMessageId; + Object o = redisTemplate.opsForValue().get(key); //从redis中获取 - traceResult = (TraceResult) redisTemplate.opsForValue().get(key); + traceResult = (TraceResult) o; if (traceResult != null) { return traceResult; } + log.error("溯源文件为空 sysMessageId:{}", sysMessageId); return null; } catch (Exception e) { log.error("溯源文件异常e:", e);