完善es模块,将es进行单文件上传和多文件上传,进行于dify数据表相关联,使用前修改es的文件路径
This commit is contained in:
parent
40eaa8e7c0
commit
867b39e6b4
|
@ -166,3 +166,9 @@ export function EsSearch(data: string){
|
|||
params: {keyword}
|
||||
})
|
||||
}
|
||||
export function createAllIndex(){
|
||||
return request({
|
||||
url:"/brichat-service/knowledge-base/createAllIndex",
|
||||
method:"post"
|
||||
})
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
<h1>智能检索</h1>
|
||||
</div>
|
||||
|
||||
|
||||
<!-- 搜索框 -->
|
||||
<div class="search-box-container">
|
||||
<div class="search-box">
|
||||
|
@ -23,6 +24,11 @@
|
|||
<div v-else class="loading-spinner"></div>
|
||||
</button>
|
||||
</div>
|
||||
<div class="search-actions">
|
||||
<button class="advanced-search-btn" @click="handleCreateIndex">
|
||||
创建索引
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- 搜索方法选择 -->
|
||||
<div class="search-methods">
|
||||
|
@ -257,7 +263,7 @@
|
|||
|
||||
<script setup lang="ts">
|
||||
import { ref, reactive, computed } from 'vue'
|
||||
import { retrieval,EsSearch } from '@/api/dataset'
|
||||
import { retrieval,EsSearch,createAllIndex } from '@/api/dataset'
|
||||
import { ElMessage, ElNotification, ElAlert } from 'element-plus'
|
||||
import { View, Download } from '@element-plus/icons-vue'
|
||||
|
||||
|
@ -293,6 +299,19 @@ const previewFileUrl = ref('')
|
|||
const previewTextContent = ref('')
|
||||
const previewMarkdownContent = ref('')
|
||||
|
||||
const handleCreateIndex = async () => {
|
||||
|
||||
loading.value = true
|
||||
try {
|
||||
await createAllIndex()
|
||||
ElMessage.success('索引创建成功')
|
||||
loading.value= false
|
||||
} catch (error) {
|
||||
ElMessage.error('索引创建失败')
|
||||
} finally {
|
||||
loading.value = false
|
||||
}
|
||||
}
|
||||
// 搜索处理函数
|
||||
const handleSearch = async () => {
|
||||
if (!searchQuery.value.trim()) {
|
||||
|
@ -1248,4 +1267,38 @@ const errorHandler = () => {
|
|||
font-size: 13px;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.search-actions {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
margin-top: 16px;
|
||||
}
|
||||
|
||||
.advanced-search-btn {
|
||||
background: linear-gradient(135deg, #3b82f6, #2563eb);
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 8px 16px;
|
||||
border-radius: 8px;
|
||||
cursor: pointer;
|
||||
font-size: 14px;
|
||||
font-weight: 500;
|
||||
transition: all 0.2s ease;
|
||||
box-shadow: 0 2px 6px rgba(37, 99, 235, 0.3);
|
||||
}
|
||||
|
||||
.advanced-search-btn:hover {
|
||||
background: linear-gradient(135deg, #2563eb, #1d4ed8);
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 4px 12px rgba(37, 99, 235, 0.4);
|
||||
}
|
||||
|
||||
.advanced-search-btn:active {
|
||||
transform: translateY(0);
|
||||
box-shadow: 0 2px 6px rgba(37, 99, 235, 0.3);
|
||||
}
|
||||
|
||||
|
||||
|
||||
</style>
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
package com.bjtds.brichat.config;
|
||||
|
||||
import com.bjtds.brichat.service.EsKnowledgeService;
|
||||
import com.bjtds.brichat.service.dify.DifyDocumentsService;
|
||||
import com.bjtds.brichat.service.impl.EsKnowledgeServiceImpl;
|
||||
import com.bjtds.brichat.util.EsKnowledgeImporter;
|
||||
import org.apache.logging.log4j.core.config.Order;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.CommandLineRunner;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
//@Component
|
||||
//@Order(3)
|
||||
public class EsStartupImporter implements CommandLineRunner {
|
||||
|
||||
@Value("${elasticsearch.dirPath}")
|
||||
private String dirPath;
|
||||
|
||||
@Value("${elasticsearch.deleteIndex}")
|
||||
private Boolean deleteIndex;
|
||||
|
||||
@Autowired
|
||||
private EsKnowledgeServiceImpl knowledgeService;
|
||||
|
||||
@Autowired
|
||||
private DifyDocumentsService difyDocumentsService;
|
||||
|
||||
@Override
|
||||
public void run(String... args) throws Exception {
|
||||
// 先删除索引
|
||||
if (deleteIndex) {
|
||||
knowledgeService.deleteIndex();
|
||||
}
|
||||
|
||||
// 再创建索引
|
||||
knowledgeService.createIndex();
|
||||
|
||||
// 指定目录,Spring Boot 启动时自动导入
|
||||
EsKnowledgeImporter importer = new EsKnowledgeImporter(knowledgeService, difyDocumentsService);
|
||||
importer.importFiles(dirPath);
|
||||
System.out.println("Spring Boot 启动完成,知识库批量导入完成!");
|
||||
}
|
||||
}
|
|
@ -6,8 +6,10 @@ import com.bjtds.brichat.entity.dataset.TUserDataset;
|
|||
import com.bjtds.brichat.entity.dataset.WorkflowDatasetDto;
|
||||
import com.bjtds.brichat.entity.dto.KnowledgeBaseDto;
|
||||
import com.bjtds.brichat.entity.dto.RecordDto;
|
||||
import com.bjtds.brichat.service.EsKnowledgeService;
|
||||
import com.bjtds.brichat.service.KnowledgeBaseService;
|
||||
import com.bjtds.brichat.service.dify.DifyDocumentsService;
|
||||
import com.bjtds.brichat.service.impl.EsKnowledgeServiceImpl;
|
||||
import com.bjtds.brichat.util.EsKnowledgeImporter;
|
||||
import com.bjtds.brichat.util.ResultUtils;
|
||||
import io.swagger.annotations.Api;
|
||||
import io.swagger.annotations.ApiOperation;
|
||||
|
@ -15,7 +17,6 @@ import org.springframework.beans.factory.annotation.Autowired;
|
|||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
@ -29,7 +30,14 @@ public class KnowledgeBaseController {
|
|||
@Autowired
|
||||
private KnowledgeBaseService knowledgeBaseService;
|
||||
@Autowired
|
||||
private EsKnowledgeService esKnowledgeService;
|
||||
private EsKnowledgeServiceImpl esKnowledgeService;
|
||||
|
||||
@Autowired
|
||||
private DifyDocumentsService difyDocumentsService;
|
||||
|
||||
@Autowired
|
||||
private EsKnowledgeImporter esKnowledgeImporter;
|
||||
|
||||
|
||||
@ApiOperation("返回检索数据")
|
||||
@PostMapping("/retrieval")
|
||||
|
@ -48,11 +56,34 @@ public class KnowledgeBaseController {
|
|||
}
|
||||
}
|
||||
|
||||
@ApiOperation("创建单个文件的索引")
|
||||
@PostMapping("/createIndex")
|
||||
public ResultUtils createIndex(@RequestParam("documentId") String documentId) throws Exception {
|
||||
|
||||
try{
|
||||
esKnowledgeImporter.importDocumentId(documentId);
|
||||
return ResultUtils.success("索引创建成功");
|
||||
} catch (IOException e) {
|
||||
return ResultUtils.error("索引创建失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
@ApiOperation("创建所有文件的索引")
|
||||
@PostMapping("/createAllIndex")
|
||||
public ResultUtils createAllIndex() throws Exception {
|
||||
try{
|
||||
esKnowledgeImporter.importAllDocuments();
|
||||
return ResultUtils.success("索引创建成功");
|
||||
} catch (IOException e) {
|
||||
return ResultUtils.error("索引创建失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@ApiOperation("返回关联表数据")
|
||||
@GetMapping("/getWorkflowAndDatasetTableData")
|
||||
public ResultUtils getWorkflowAndDatasetTableData() throws Exception {
|
||||
List<WorkflowDatasetDto> workflowDatasets = knowledgeBaseService.getWorkflowAndDatasetTableData();
|
||||
return ResultUtils.success(workflowDatasets);
|
||||
|
||||
}
|
||||
|
||||
@ApiOperation("根据 appId 获取 workflow graph 数据中的 dataset_ids")
|
||||
|
|
|
@ -7,9 +7,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore;
|
|||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.Data;
|
||||
import org.apache.ibatis.type.JdbcType;
|
||||
import org.docx4j.wml.U;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
@Data
|
||||
|
@ -37,6 +34,7 @@ public class Document {
|
|||
private int word_count;
|
||||
private int hit_count;
|
||||
private String doc_form;
|
||||
private DataSourceInfo dataSourceInfoObj;
|
||||
|
||||
// Getters and setters
|
||||
|
||||
|
@ -48,11 +46,13 @@ public class Document {
|
|||
private String source_url;
|
||||
@JsonProperty("key")
|
||||
private String key;
|
||||
@JsonProperty("name")
|
||||
private String name;
|
||||
// Getter and setter
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public DataSourceInfo getDataSourceInfoObj() {
|
||||
public DataSourceInfo getDataSourceInfoObjFromJson() {
|
||||
if (dataSourceInfo == null) return null;
|
||||
try {
|
||||
return new ObjectMapper().readValue(dataSourceInfo, DataSourceInfo.class);
|
||||
|
|
|
@ -19,7 +19,5 @@ public class KnowledgeDoc {
|
|||
private String fileType;
|
||||
private Date uploadTime;
|
||||
private List<String> dataset_id;
|
||||
private String upload_file_id;
|
||||
private String source_url;
|
||||
private String key;
|
||||
}
|
||||
|
|
|
@ -13,4 +13,10 @@ public interface DifyDocumentsMapper {
|
|||
List<Document> getDatasetIdsByName(String name);
|
||||
|
||||
Document.DataSourceInfo getFileSourceUrlByUploadFileId(UUID UploadFileId);
|
||||
|
||||
Document.DataSourceInfo getUploadFileById(UUID documentId);
|
||||
|
||||
Document getDocumentById(UUID documentId);
|
||||
|
||||
List<Document> getDocuments();
|
||||
}
|
||||
|
|
|
@ -12,5 +12,12 @@ public interface DifyDocumentsService {
|
|||
|
||||
Document.DataSourceInfo getFileSourceUrl(UUID UploadFileId);
|
||||
|
||||
Document.DataSourceInfo getFileInfo(UUID UploadFileId);
|
||||
|
||||
Document getDocumentById(UUID documentId);
|
||||
|
||||
Document getDocumentByIdWithFileInfo(UUID documentId);
|
||||
|
||||
List<Document> getDocuments();
|
||||
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import org.springframework.core.annotation.Order;
|
|||
import org.springframework.stereotype.Service;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
|
@ -27,4 +28,53 @@ public class DifyDocumentsServiceImpl implements DifyDocumentsService {
|
|||
public Document.DataSourceInfo getFileSourceUrl(UUID UploadFileId) {
|
||||
return difyDocumentsMapper.getFileSourceUrlByUploadFileId(UploadFileId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document.DataSourceInfo getFileInfo(UUID UploadFileId) {
|
||||
return difyDocumentsMapper.getUploadFileById(UploadFileId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getDocumentById(UUID documentId) {
|
||||
return difyDocumentsMapper.getDocumentById(documentId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getDocumentByIdWithFileInfo(UUID documentId) {
|
||||
Document document = getDocumentById(documentId);
|
||||
if (document != null && document.getDataSourceInfoObj() != null) {
|
||||
UUID uploadFileId = document.getDataSourceInfoObjFromJson().getUpload_file_id();
|
||||
if (uploadFileId != null) {
|
||||
Document.DataSourceInfo fileInfo = getFileInfo(uploadFileId);
|
||||
if (fileInfo != null) {
|
||||
document.setDataSourceInfoObj(fileInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
return document;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Document> getDocuments() {
|
||||
// 查询所有文档
|
||||
List<Document> documents = difyDocumentsMapper.getDocuments();
|
||||
|
||||
if (documents != null && !documents.isEmpty()) {
|
||||
for (Document document : documents) {
|
||||
if (document.getDataSourceInfoObjFromJson() != null) {
|
||||
UUID uploadFileId = document.getDataSourceInfoObjFromJson().getUpload_file_id();
|
||||
if (uploadFileId != null) {
|
||||
// 查询对应文件信息
|
||||
Document.DataSourceInfo fileInfo = getFileInfo(uploadFileId);
|
||||
if (fileInfo != null) {
|
||||
document.setDataSourceInfoObj(fileInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return documents;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -62,10 +62,10 @@ public class EsKnowledgeServiceImpl implements EsKnowledgeService {
|
|||
@Override
|
||||
public void addDoc(KnowledgeDoc doc) throws IOException {
|
||||
// 先检查索引中是否已存在该文件
|
||||
if (existsDoc(doc.getFilePath())) {
|
||||
System.out.println("文件已存在索引中,跳过: " + doc.getFilePath());
|
||||
return;
|
||||
}
|
||||
// if (existsDoc(doc.getFilePath())) {
|
||||
// System.out.println("文件已存在索引中,跳过: " + doc.getFilePath());
|
||||
// return;
|
||||
// }
|
||||
|
||||
File file = new File(doc.getFilePath());
|
||||
int cpuThreads = Runtime.getRuntime().availableProcessors();
|
||||
|
@ -92,8 +92,6 @@ public class EsKnowledgeServiceImpl implements EsKnowledgeService {
|
|||
doc.getFileType(),
|
||||
doc.getUploadTime(),
|
||||
doc.getDataset_id(),
|
||||
doc.getUpload_file_id(),
|
||||
doc.getSource_url(),
|
||||
doc.getKey()
|
||||
);
|
||||
|
||||
|
@ -123,8 +121,6 @@ public class EsKnowledgeServiceImpl implements EsKnowledgeService {
|
|||
doc.getFileType(),
|
||||
doc.getUploadTime(),
|
||||
doc.getDataset_id(),
|
||||
doc.getUpload_file_id(),
|
||||
doc.getSource_url(),
|
||||
doc.getKey()
|
||||
);
|
||||
|
||||
|
@ -206,9 +202,7 @@ public class EsKnowledgeServiceImpl implements EsKnowledgeService {
|
|||
item.put("fileType", d.getFileType());
|
||||
item.put("score", normalizedScore);
|
||||
item.put("dataset_id", d.getDataset_id());
|
||||
item.put("upload_file_id", d.getUpload_file_id());
|
||||
item.put("key", d.getKey());
|
||||
item.put("source_url", d.getSource_url());
|
||||
|
||||
if (hit.highlight() != null) {
|
||||
String content = String.join(" ... ", hit.highlight().getOrDefault("content", Collections.emptyList()));
|
||||
|
|
|
@ -1,24 +1,34 @@
|
|||
package com.bjtds.brichat.util;
|
||||
|
||||
|
||||
import com.bjtds.brichat.entity.dify.Document;
|
||||
import com.bjtds.brichat.entity.esmodel.KnowledgeDoc;
|
||||
import com.bjtds.brichat.mapper.postgresql.DifyDocumentsMapper;
|
||||
import com.bjtds.brichat.service.dify.DifyDocumentsService;
|
||||
import com.bjtds.brichat.service.dify.impl.DifyDocumentsServiceImpl;
|
||||
import com.bjtds.brichat.service.impl.EsKnowledgeServiceImpl;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import org.checkerframework.checker.units.qual.K;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
@Service
|
||||
public class EsKnowledgeImporter {
|
||||
|
||||
private final EsKnowledgeServiceImpl knowledgeService;
|
||||
private final DifyDocumentsService difyDocumentsService;
|
||||
private final DifyDocumentsService difyDocumentsService;
|
||||
private static final Logger log = LoggerFactory.getLogger(EsKnowledgeImporter.class);
|
||||
|
||||
@Value("${elasticsearch.dirPath}")
|
||||
private String indexPath;
|
||||
|
||||
|
||||
|
||||
|
||||
public EsKnowledgeImporter(EsKnowledgeServiceImpl knowledgeService, DifyDocumentsService difyDocumentsService) {
|
||||
|
@ -26,70 +36,184 @@ public class EsKnowledgeImporter {
|
|||
this.difyDocumentsService = difyDocumentsService;
|
||||
}
|
||||
|
||||
// 批量导入目录下的文件(包含子目录)
|
||||
public void importFiles(String dirPath) throws IOException {
|
||||
File dir = new File(dirPath);
|
||||
if (!dir.exists() || !dir.isDirectory()) {
|
||||
throw new IllegalArgumentException(dirPath + " 不是目录");
|
||||
|
||||
|
||||
// // 批量导入目录下的文件(包含子目录)
|
||||
// public void importFiles(String dirPath) throws IOException {
|
||||
// File dir = new File(dirPath);
|
||||
// if (!dir.exists() || !dir.isDirectory()) {
|
||||
// throw new IllegalArgumentException(dirPath + " 不是目录");
|
||||
// }
|
||||
// importDirectory(dir);
|
||||
// }
|
||||
|
||||
// 根据documentId导入文档(优化版,减少数据库查询)
|
||||
public void importDocumentId(String documentId) throws IOException {
|
||||
// 一次性获取完整的文档信息
|
||||
|
||||
Document document = difyDocumentsService.getDocumentByIdWithFileInfo(UUID.fromString(documentId));
|
||||
if (document == null) {
|
||||
throw new IllegalArgumentException("documentId 不存在");
|
||||
}
|
||||
|
||||
importDirectory(dir);
|
||||
Document.DataSourceInfo dataSourceInfo = document.getDataSourceInfoObj();
|
||||
if (dataSourceInfo == null) {
|
||||
throw new IllegalArgumentException("dataSourceInfo 不存在");
|
||||
}
|
||||
|
||||
UUID uploadFileId = dataSourceInfo.getUpload_file_id();
|
||||
if (uploadFileId == null) {
|
||||
throw new IllegalArgumentException("uploadFileId 不存在");
|
||||
}
|
||||
|
||||
String key = dataSourceInfo.getKey();
|
||||
String filePath = indexPath + key;
|
||||
File file = new File(filePath);
|
||||
if (!file.exists()) {
|
||||
throw new IllegalArgumentException(filePath + " 不存在");
|
||||
}
|
||||
|
||||
// 处理文件导入
|
||||
processFileWithDocumentInfo(file, document);
|
||||
}
|
||||
/**
|
||||
* 根据 Document 列表批量构建索引
|
||||
*/
|
||||
public void importAllDocuments() throws IOException {
|
||||
List<Document> documents = difyDocumentsService.getDocuments();
|
||||
if (documents == null || documents.isEmpty()) {
|
||||
throw new IllegalArgumentException("documents 列表为空");
|
||||
}
|
||||
|
||||
// 递归遍历目录
|
||||
private void importDirectory(File dir) throws IOException {
|
||||
for (Document document : documents) {
|
||||
if (document == null) continue;
|
||||
|
||||
Document.DataSourceInfo dataSourceInfo = document.getDataSourceInfoObj();
|
||||
if (dataSourceInfo == null) {
|
||||
log.warn("documentId={} 缺少 DataSourceInfo,跳过", document.getId());
|
||||
continue;
|
||||
}
|
||||
|
||||
File[] files = dir.listFiles();
|
||||
if (files == null) return;
|
||||
String key = dataSourceInfo.getKey();
|
||||
if (key == null || key.isEmpty()) {
|
||||
log.warn("documentId=" + document.getId() + " 的 key 为空,跳过");
|
||||
continue;
|
||||
}
|
||||
|
||||
for (File file : files) {
|
||||
if (file.isDirectory()) {
|
||||
// 递归子目录
|
||||
importDirectory(file);
|
||||
} else if (file.isFile()) {
|
||||
try {
|
||||
// 检查索引中是否已存在该文件
|
||||
if (knowledgeService.existsDoc(file.getAbsolutePath())) {
|
||||
System.out.println("文件已存在索引,跳过: " + file.getAbsolutePath());
|
||||
continue;
|
||||
}
|
||||
String filePath = indexPath + key;
|
||||
File file = new File(filePath);
|
||||
if (!file.exists()) {
|
||||
log.warn(filePath + " 不存在,跳过");
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
String content = EsFileParser.parseFile(file);
|
||||
String fileName = file.getName();
|
||||
List<Document> documents = difyDocumentsService.getDataset(fileName);
|
||||
List<String> datasetIds = documents.stream()
|
||||
.filter(document -> document.isEnabled())
|
||||
.map(document -> document.getDatasetId()) // 假设 Document 里有 getDatasetId()
|
||||
.collect(Collectors.toList());
|
||||
if (datasetIds == null || datasetIds.isEmpty()) {
|
||||
System.out.println("文件未关联数据集,跳过: " + file.getAbsolutePath());
|
||||
continue;
|
||||
}
|
||||
UUID uploadFileId = documents.get(0).getDataSourceInfoObj().getUpload_file_id();
|
||||
System.out.println(uploadFileId);
|
||||
Document.DataSourceInfo DatasourceInfo = difyDocumentsService.getFileSourceUrl(uploadFileId);
|
||||
String sourceUrl = DatasourceInfo.getSource_url();
|
||||
String key = DatasourceInfo.getKey();
|
||||
|
||||
KnowledgeDoc doc = new KnowledgeDoc();
|
||||
doc.setId(UUID.randomUUID().toString());
|
||||
doc.setTitle(file.getName());
|
||||
doc.setContent(content);
|
||||
doc.setFileName(file.getName());
|
||||
doc.setFilePath(file.getAbsolutePath());
|
||||
doc.setFileType(file.getName().substring(file.getName().lastIndexOf(".") + 1));
|
||||
doc.setUpload_file_id(uploadFileId.toString());
|
||||
doc.setDataset_id(datasetIds);
|
||||
doc.setSource_url(sourceUrl);
|
||||
doc.setKey(key);
|
||||
knowledgeService.addDoc(doc);
|
||||
System.out.println("导入成功: " + file.getAbsolutePath());
|
||||
} catch (Exception e) {
|
||||
System.err.println("导入失败: " + file.getAbsolutePath() + " 原因: " + e.getMessage());
|
||||
}
|
||||
// 调用你的文件处理逻辑,将文件内容与 document 信息一起构建索引
|
||||
try {
|
||||
processFileWithDocumentInfo(file, document);
|
||||
} catch (Exception e) {
|
||||
log.error("documentId=" + document.getId() + " 索引构建失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// // 递归遍历目录
|
||||
// private void importDirectory(File dir) throws IOException {
|
||||
// File[] files = dir.listFiles();
|
||||
// if (files == null) return;
|
||||
//
|
||||
// for (File file : files) {
|
||||
// if (file.isDirectory()) {
|
||||
// importDirectory(file);
|
||||
// } else if (file.isFile()) {
|
||||
// processFile(file);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// // 处理单个文件的核心逻辑
|
||||
// private void processFile(File file,Document document) throws IOException {
|
||||
// try {
|
||||
// // 检查索引中是否已存在该文件
|
||||
// if (knowledgeService.existsDoc(file.getAbsolutePath())) {
|
||||
// System.out.println("文件已存在索引,跳过: " + file.getAbsolutePath());
|
||||
// return;
|
||||
// }
|
||||
//
|
||||
// String content = EsFileParser.parseFile(file);
|
||||
// String fileName = file.getName();
|
||||
//
|
||||
// // 从 Dify 查找数据集
|
||||
// List<Document> documents = difyDocumentsService.getDataset(fileName);
|
||||
// List<String> datasetIds = documents.stream()
|
||||
// .filter(Document::isEnabled)
|
||||
// .map(Document::getDatasetId)
|
||||
// .collect(Collectors.toList());
|
||||
//
|
||||
// if (datasetIds == null || datasetIds.isEmpty()) {
|
||||
// System.out.println("文件未关联数据集,跳过: " + file.getAbsolutePath());
|
||||
// return;
|
||||
// }
|
||||
//
|
||||
// UUID uploadFileId = documents.get(0).getDataSourceInfoObj().getUpload_file_id();
|
||||
// Document.DataSourceInfo dataSourceInfo = difyDocumentsService.getFileSourceUrl(uploadFileId);
|
||||
//
|
||||
// createAndSaveKnowledgeDoc(file, content, datasetIds, uploadFileId, dataSourceInfo,document);
|
||||
// System.out.println("导入成功: " + file.getAbsolutePath());
|
||||
// } catch (Exception e) {
|
||||
// System.err.println("导入失败: " + file.getAbsolutePath() + " 原因: " + e.getMessage());
|
||||
// }
|
||||
// }
|
||||
|
||||
// 使用已知的document信息处理文件(避免重复查询)
|
||||
private void processFileWithDocumentInfo(File file, Document document) throws IOException {
|
||||
try {
|
||||
try {
|
||||
knowledgeService.createIndex();
|
||||
log.info("创建索引成功");
|
||||
} catch (IOException e) {
|
||||
log.error("创建索引失败", e);
|
||||
}
|
||||
|
||||
// 检查索引中是否已存在该文件
|
||||
if (knowledgeService.existsDoc(file.getAbsolutePath())) {
|
||||
log.info("文件已存在索引,跳过: " + file.getAbsolutePath());
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
String content = EsFileParser.parseFile(file);
|
||||
Document.DataSourceInfo dataSourceInfo = document.getDataSourceInfoObj();
|
||||
|
||||
// 直接从document获取数据集信息 - 使用Arrays.asList替代List.of
|
||||
List<String> datasetIds = Arrays.asList(document.getDatasetId());
|
||||
|
||||
createAndSaveKnowledgeDoc(file, content, datasetIds, dataSourceInfo,document);
|
||||
log.info("导入成功: " + file.getAbsolutePath());
|
||||
} catch (Exception e) {
|
||||
log.error("导入失败: " + file.getAbsolutePath() + " 原因: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// 创建并保存KnowledgeDoc(提取公共逻辑)
|
||||
private void createAndSaveKnowledgeDoc(File file, String content, List<String> datasetIds,
|
||||
Document.DataSourceInfo dataSourceInfo,Document document) throws IOException {
|
||||
String key = dataSourceInfo.getKey();
|
||||
|
||||
KnowledgeDoc doc = new KnowledgeDoc();
|
||||
doc.setId(UUID.randomUUID().toString());
|
||||
doc.setTitle(document.getName());
|
||||
doc.setContent(content);
|
||||
doc.setFileName(file.getName());
|
||||
doc.setFilePath(indexPath+key);
|
||||
doc.setDataset_id(datasetIds);
|
||||
doc.setKey(key);
|
||||
knowledgeService.addDoc(doc);
|
||||
}
|
||||
|
||||
// 获取文件扩展名
|
||||
private String getFileExtension(String fileName) {
|
||||
int lastDotIndex = fileName.lastIndexOf(".");
|
||||
return lastDotIndex > 0 ? fileName.substring(lastDotIndex + 1) : "";
|
||||
}
|
||||
}
|
|
@ -63,7 +63,7 @@ pdf:
|
|||
|
||||
#es检索服务配置
|
||||
elasticsearch:
|
||||
host: localhost
|
||||
host: 192.168.1.211
|
||||
port: 9200
|
||||
scheme: http
|
||||
#存储文件路径
|
||||
|
|
|
@ -64,10 +64,10 @@ pdf:
|
|||
|
||||
#es检索服务配置
|
||||
elasticsearch:
|
||||
host: localhost
|
||||
host: 192.168.8.253
|
||||
port: 9200
|
||||
scheme: http
|
||||
#存储文件路径
|
||||
dirPath: C:\\Users\\ASUS\\Desktop\\data
|
||||
dirPath: C:/Users/ASUS/Desktop/data/
|
||||
#是否删除索引,重新构建索引
|
||||
deleteIndex: false
|
||||
|
|
|
@ -14,4 +14,21 @@
|
|||
WHERE id = #{UploadFileId}
|
||||
</select>
|
||||
|
||||
<select id="getUploadFileById" resultType="com.bjtds.brichat.entity.dify.Document$DataSourceInfo">
|
||||
SELECT *
|
||||
FROM upload_files
|
||||
WHERE id = #{documentId}
|
||||
</select>
|
||||
|
||||
<select id="getDocumentById" resultType="com.bjtds.brichat.entity.dify.Document">
|
||||
SELECT *
|
||||
FROM documents
|
||||
WHERE id = #{documentId}
|
||||
</select>
|
||||
|
||||
<select id="getDocuments" resultType="com.bjtds.brichat.entity.dify.Document">
|
||||
SELECT *
|
||||
FROM documents
|
||||
</select>
|
||||
|
||||
</mapper>
|
Loading…
Reference in New Issue