优化了es索引分数,跟关键字的相关度更高,会随着关键字的多少进行匹配,增加当删除知识库文件时,该文件索引也会被删除
This commit is contained in:
parent
fbad0df5de
commit
e48671fb48
|
|
@ -882,6 +882,7 @@ if (!newVal) {
|
|||
import { useUserStore } from '@/store/modules/user'
|
||||
import { useAclStore } from '@/store/modules/acl'
|
||||
import vab from '~/library/plugins/vab'
|
||||
import {log} from "echarts/types/src/util/log";
|
||||
|
||||
// 事件处理函数
|
||||
const renderedHandler = () => {
|
||||
|
|
@ -1225,7 +1226,13 @@ try {
|
|||
|
||||
// 调用删除接口
|
||||
await deleteDocument(parseInt(row.id))
|
||||
|
||||
try{
|
||||
//删除es索引
|
||||
await deleteFileIndex(row.difyDocId,datasetId.value)
|
||||
console.log("删除es索引成功,文档id:{}",row.difyDocId)
|
||||
} catch (error) {
|
||||
console.error("删除es索引失败,文档id:{}",row.difyDocId,error)
|
||||
}
|
||||
|
||||
// 成功处理
|
||||
ElNotification({
|
||||
|
|
|
|||
|
|
@ -70,6 +70,14 @@ public class DatasetManageController {
|
|||
datasetManagerService.deleteDataset(id);
|
||||
|
||||
difyDatasetService.delete(id);
|
||||
|
||||
//删除es索引
|
||||
try {
|
||||
esTDatasetFilesService.deleteIndex(id);
|
||||
log.info("删除es索引成功,知识库id:{}",id);
|
||||
} catch (IOException e) {
|
||||
log.error("删除es索引失败,知识库id:{}",id,e);
|
||||
}
|
||||
}
|
||||
|
||||
@PostMapping("/create")
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
|
|||
private ElasticsearchClient client;
|
||||
@Autowired
|
||||
private TUserDatasetMapper tUserDatasetMapper;
|
||||
|
||||
@Autowired
|
||||
private DifyDatasetsMapper difyDatasetsMapper;
|
||||
|
||||
|
|
@ -73,6 +74,7 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
|
|||
.properties("source_url", p -> p.keyword(k -> k))
|
||||
.properties("dataset_name", p -> p.keyword(k -> k))
|
||||
.properties("document_id", p -> p.keyword(k -> k))
|
||||
.properties("difyDocId", p -> p.keyword(k -> k))
|
||||
|
||||
)
|
||||
);
|
||||
|
|
@ -263,12 +265,15 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
|
|||
double epsilon = 1e-6;
|
||||
double lower = 0.05, upper = 0.98;
|
||||
|
||||
Random random = new Random();
|
||||
int index = 0;
|
||||
for (Hit<TDatasetFiles> hit : response.hits().hits()) {
|
||||
TDatasetFiles d = hit.source();
|
||||
double rawScore = scores.get(index++);
|
||||
double normalizedScore = (maxScore - minScore < epsilon) ? upper
|
||||
: lower + (rawScore - minScore) / (maxScore - minScore) * (upper - lower);
|
||||
log.info("Score: {}", rawScore);
|
||||
double normalizedScore = normalizeScore(rawScore, minScore, maxScore, lower, upper, epsilon, random);
|
||||
|
||||
// 高亮内容
|
||||
|
||||
String content = String.join(" ... ", hit.highlight().getOrDefault("content", Collections.emptyList()));
|
||||
System.out.println("content: " + content);
|
||||
|
|
@ -340,11 +345,11 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
|
|||
try {
|
||||
DeleteByQueryResponse response = client.deleteByQuery(d -> d
|
||||
.index(DatasetId)
|
||||
.query(q -> q.term(t -> t.field("document_id").value(documentId))));
|
||||
datasetFilesService.updateByDatasetIdAndDocId(DatasetId, documentId);
|
||||
.query(q -> q.term(t -> t.field("difyDocId").value(documentId))));
|
||||
log.info("删除文档索引成功: {}, documentId: {}", DatasetId, documentId);
|
||||
log.info(" deleted: {}", response.deleted());
|
||||
} catch (IOException e) {
|
||||
log.error("删除文档索引失败: {}, documentId: {}", DatasetId, documentId, e);
|
||||
log.error("删除文档索引失败: {}, documentId: {}, error: {}", DatasetId, documentId, e.getMessage());
|
||||
}
|
||||
} else {
|
||||
log.warn("索引不存在: {}", DatasetId);
|
||||
|
|
@ -365,6 +370,33 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
|
|||
});
|
||||
}
|
||||
|
||||
private double normalizeScore(double rawScore, double minScore, double maxScore, double lower, double upper, double epsilon, Random random) {
|
||||
// 计算分数范围
|
||||
double scoreRange = maxScore - minScore;
|
||||
log.warn("Score range: {}", scoreRange);
|
||||
|
||||
// 如果最大最小分数相差小于 epsilon,直接使用 upper
|
||||
double normalizedScore = (scoreRange < epsilon) ? upper
|
||||
: lower + (rawScore - minScore) / scoreRange * (upper - lower);
|
||||
|
||||
log.warn("rawScore: {}, normalizedScore before fluctuation: {}", rawScore, normalizedScore);
|
||||
|
||||
// 获取原始分数的小数点后两位
|
||||
double integerPart = Math.floor(rawScore); // 获取整数部分
|
||||
double decimalPart = rawScore - integerPart; // 获取小数部分
|
||||
double decimalPartOneDigit = Math.floor(decimalPart * 10) / 10.0; // 获取小数部分的第一位
|
||||
double result = integerPart + decimalPartOneDigit; // 将整数部分和小数点后一位合成
|
||||
double decimalPartTwoDigits = (result*10)/ 1000; // 获取小数部分的两位
|
||||
log.warn("Raw score decimal part (2 digits): {}", decimalPartTwoDigits);
|
||||
if(normalizedScore==upper){
|
||||
normalizedScore -= (0.1-decimalPartTwoDigits);
|
||||
}else if(normalizedScore==lower){
|
||||
normalizedScore += decimalPartTwoDigits;
|
||||
}
|
||||
return normalizedScore;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue