Merge branch 'es检索'
# Conflicts: # chat-client/src/views/datasets/components/DocumentList.vue
This commit is contained in:
commit
f247efbb79
|
|
@ -9,6 +9,7 @@ import com.bjtds.brichat.entity.dto.PdfTaskDto;
|
||||||
import com.bjtds.brichat.service.DatasetFilesService;
|
import com.bjtds.brichat.service.DatasetFilesService;
|
||||||
import com.bjtds.brichat.service.DatasetsDocService;
|
import com.bjtds.brichat.service.DatasetsDocService;
|
||||||
import com.bjtds.brichat.service.DeepAnalysisQueueService;
|
import com.bjtds.brichat.service.DeepAnalysisQueueService;
|
||||||
|
import com.bjtds.brichat.service.EsTDatasetFilesService;
|
||||||
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
import com.bjtds.brichat.service.dify.DifyDatasetApiService;
|
||||||
import com.bjtds.brichat.util.Constants;
|
import com.bjtds.brichat.util.Constants;
|
||||||
import com.bjtds.brichat.util.ResultUtils;
|
import com.bjtds.brichat.util.ResultUtils;
|
||||||
|
|
@ -27,6 +28,7 @@ import org.springframework.web.bind.annotation.*;
|
||||||
import org.springframework.web.client.RestTemplate;
|
import org.springframework.web.client.RestTemplate;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
import javax.annotation.Resource;
|
import javax.annotation.Resource;
|
||||||
|
import java.io.IOException;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -70,6 +72,9 @@ public class DatasetDocController {
|
||||||
@Resource
|
@Resource
|
||||||
private DeepAnalysisQueueService deepAnalysisQueueService;
|
private DeepAnalysisQueueService deepAnalysisQueueService;
|
||||||
|
|
||||||
|
@Resource
|
||||||
|
private EsTDatasetFilesService esTDatasetFilesService;
|
||||||
|
|
||||||
|
|
||||||
@Value("${dify.url}")
|
@Value("${dify.url}")
|
||||||
private String difyUrl;
|
private String difyUrl;
|
||||||
|
|
@ -137,9 +142,14 @@ public class DatasetDocController {
|
||||||
for (TDatasetFiles tdatasetFiles : allFilesByParentId) {
|
for (TDatasetFiles tdatasetFiles : allFilesByParentId) {
|
||||||
if(tdatasetFiles.getDifyDocId() != null && !tdatasetFiles.getDifyDocId().isEmpty()){
|
if(tdatasetFiles.getDifyDocId() != null && !tdatasetFiles.getDifyDocId().isEmpty()){
|
||||||
difyDatasetService.deleteDocument(tdatasetFiles.getDifyDatasetId(),tdatasetFiles.getDifyDocId());
|
difyDatasetService.deleteDocument(tdatasetFiles.getDifyDatasetId(),tdatasetFiles.getDifyDocId());
|
||||||
|
//删除es索引
|
||||||
|
try{
|
||||||
|
esTDatasetFilesService.deleteDocIndex(tdatasetFiles.getDifyDatasetId(),tdatasetFiles.getDifyDocId());
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error("删除es索引失败,文档id:{}",tdatasetFiles.getDifyDocId(),e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
datasetFilesService.physicalDeleteFile(tdatasetFiles.getId());
|
datasetFilesService.physicalDeleteFile(tdatasetFiles.getId());
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -147,7 +157,14 @@ public class DatasetDocController {
|
||||||
datasetFilesService.deleteFile(fileId);
|
datasetFilesService.deleteFile(fileId);
|
||||||
} else{
|
} else{
|
||||||
if(fileInfo.getDifyDocId() != null && !fileInfo.getDifyDocId().isEmpty()){
|
if(fileInfo.getDifyDocId() != null && !fileInfo.getDifyDocId().isEmpty()){
|
||||||
|
|
||||||
difyDatasetService.deleteDocument(fileInfo.getDifyDatasetId(),fileInfo.getDifyDocId());
|
difyDatasetService.deleteDocument(fileInfo.getDifyDatasetId(),fileInfo.getDifyDocId());
|
||||||
|
//删除es索引
|
||||||
|
try{
|
||||||
|
esTDatasetFilesService.deleteDocIndex(fileInfo.getDifyDatasetId(),fileInfo.getDifyDocId());
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error("删除es索引失败,文档id:{}",fileInfo.getDifyDocId(),e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
datasetFilesService.physicalDeleteFile(fileId);
|
datasetFilesService.physicalDeleteFile(fileId);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,14 @@ public class DatasetManageController {
|
||||||
datasetManagerService.deleteDataset(id);
|
datasetManagerService.deleteDataset(id);
|
||||||
|
|
||||||
difyDatasetService.delete(id);
|
difyDatasetService.delete(id);
|
||||||
|
|
||||||
|
//删除es索引
|
||||||
|
try {
|
||||||
|
esTDatasetFilesService.deleteIndex(id);
|
||||||
|
log.info("删除es索引成功,知识库id:{}",id);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error("删除es索引失败,知识库id:{}",id,e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@PostMapping("/create")
|
@PostMapping("/create")
|
||||||
|
|
|
||||||
|
|
@ -141,16 +141,16 @@ public class KnowledgeBaseController {
|
||||||
result.put("finished", finished);
|
result.put("finished", finished);
|
||||||
return ResultUtils.success(result);
|
return ResultUtils.success(result);
|
||||||
}
|
}
|
||||||
@ApiOperation("删除索引下的文件")
|
// @ApiOperation("删除索引下的文件")
|
||||||
@DeleteMapping("/deleteFileIndex")
|
// @DeleteMapping("/deleteFileIndex")
|
||||||
public ResultUtils deleteFileIndex(@RequestParam("documentId") String documentId,@RequestParam("datasetId")String datasetId) throws Exception {
|
// public ResultUtils deleteFileIndex(@RequestParam("documentId") String documentId,@RequestParam("datasetId")String datasetId) throws Exception {
|
||||||
try{
|
// try{
|
||||||
esTDatasetFilesService.deleteDocIndex(datasetId,documentId);
|
// esTDatasetFilesService.deleteDocIndex(datasetId,documentId);
|
||||||
return ResultUtils.success("索引删除成功");
|
// return ResultUtils.success("索引删除成功");
|
||||||
} catch (IOException e) {
|
// } catch (IOException e) {
|
||||||
return ResultUtils.error("索引删除失败: " + e.getMessage());
|
// return ResultUtils.error("索引删除失败: " + e.getMessage());
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
@ApiOperation("删除所有索引")
|
@ApiOperation("删除所有索引")
|
||||||
@DeleteMapping("/deleteAllIndex")
|
@DeleteMapping("/deleteAllIndex")
|
||||||
public ResultUtils deleteAllIndex() throws Exception {
|
public ResultUtils deleteAllIndex() throws Exception {
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
|
||||||
private ElasticsearchClient client;
|
private ElasticsearchClient client;
|
||||||
@Autowired
|
@Autowired
|
||||||
private TUserDatasetMapper tUserDatasetMapper;
|
private TUserDatasetMapper tUserDatasetMapper;
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private DifyDatasetsMapper difyDatasetsMapper;
|
private DifyDatasetsMapper difyDatasetsMapper;
|
||||||
|
|
||||||
|
|
@ -73,6 +74,7 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
|
||||||
.properties("source_url", p -> p.keyword(k -> k))
|
.properties("source_url", p -> p.keyword(k -> k))
|
||||||
.properties("dataset_name", p -> p.keyword(k -> k))
|
.properties("dataset_name", p -> p.keyword(k -> k))
|
||||||
.properties("document_id", p -> p.keyword(k -> k))
|
.properties("document_id", p -> p.keyword(k -> k))
|
||||||
|
.properties("difyDocId", p -> p.keyword(k -> k))
|
||||||
|
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
@ -263,12 +265,15 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
|
||||||
double epsilon = 1e-6;
|
double epsilon = 1e-6;
|
||||||
double lower = 0.05, upper = 0.98;
|
double lower = 0.05, upper = 0.98;
|
||||||
|
|
||||||
|
Random random = new Random();
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (Hit<TDatasetFiles> hit : response.hits().hits()) {
|
for (Hit<TDatasetFiles> hit : response.hits().hits()) {
|
||||||
TDatasetFiles d = hit.source();
|
TDatasetFiles d = hit.source();
|
||||||
double rawScore = scores.get(index++);
|
double rawScore = scores.get(index++);
|
||||||
double normalizedScore = (maxScore - minScore < epsilon) ? upper
|
log.info("Score: {}", rawScore);
|
||||||
: lower + (rawScore - minScore) / (maxScore - minScore) * (upper - lower);
|
double normalizedScore = normalizeScore(rawScore, minScore, maxScore, lower, upper, epsilon, random);
|
||||||
|
|
||||||
|
// 高亮内容
|
||||||
|
|
||||||
String content = String.join(" ... ", hit.highlight().getOrDefault("content", Collections.emptyList()));
|
String content = String.join(" ... ", hit.highlight().getOrDefault("content", Collections.emptyList()));
|
||||||
System.out.println("content: " + content);
|
System.out.println("content: " + content);
|
||||||
|
|
@ -340,11 +345,11 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
|
||||||
try {
|
try {
|
||||||
DeleteByQueryResponse response = client.deleteByQuery(d -> d
|
DeleteByQueryResponse response = client.deleteByQuery(d -> d
|
||||||
.index(DatasetId)
|
.index(DatasetId)
|
||||||
.query(q -> q.term(t -> t.field("document_id").value(documentId))));
|
.query(q -> q.term(t -> t.field("difyDocId").value(documentId))));
|
||||||
datasetFilesService.updateByDatasetIdAndDocId(DatasetId, documentId);
|
|
||||||
log.info("删除文档索引成功: {}, documentId: {}", DatasetId, documentId);
|
log.info("删除文档索引成功: {}, documentId: {}", DatasetId, documentId);
|
||||||
|
log.info(" deleted: {}", response.deleted());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
log.error("删除文档索引失败: {}, documentId: {}", DatasetId, documentId, e);
|
log.error("删除文档索引失败: {}, documentId: {}, error: {}", DatasetId, documentId, e.getMessage());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.warn("索引不存在: {}", DatasetId);
|
log.warn("索引不存在: {}", DatasetId);
|
||||||
|
|
@ -365,6 +370,33 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private double normalizeScore(double rawScore, double minScore, double maxScore, double lower, double upper, double epsilon, Random random) {
|
||||||
|
// 计算分数范围
|
||||||
|
double scoreRange = maxScore - minScore;
|
||||||
|
log.warn("Score range: {}", scoreRange);
|
||||||
|
|
||||||
|
// 如果最大最小分数相差小于 epsilon,直接使用 upper
|
||||||
|
double normalizedScore = (scoreRange < epsilon) ? upper
|
||||||
|
: lower + (rawScore - minScore) / scoreRange * (upper - lower);
|
||||||
|
|
||||||
|
log.warn("rawScore: {}, normalizedScore before fluctuation: {}", rawScore, normalizedScore);
|
||||||
|
|
||||||
|
// 获取原始分数的小数点后两位
|
||||||
|
double integerPart = Math.floor(rawScore); // 获取整数部分
|
||||||
|
double decimalPart = rawScore - integerPart; // 获取小数部分
|
||||||
|
double decimalPartOneDigit = Math.floor(decimalPart * 10) / 10.0; // 获取小数部分的第一位
|
||||||
|
double result = integerPart + decimalPartOneDigit; // 将整数部分和小数点后一位合成
|
||||||
|
double decimalPartTwoDigits = (result*10)/ 1000; // 获取小数部分的两位
|
||||||
|
log.warn("Raw score decimal part (2 digits): {}", decimalPartTwoDigits);
|
||||||
|
if(normalizedScore==upper){
|
||||||
|
normalizedScore -= (0.1-decimalPartTwoDigits);
|
||||||
|
}else if(normalizedScore==lower){
|
||||||
|
normalizedScore += decimalPartTwoDigits;
|
||||||
|
}
|
||||||
|
return normalizedScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue