Merge branch 'es'

This commit is contained in:
wenjinbo 2025-09-28 08:08:37 +08:00
commit 0d80efd8e2
1 changed files with 55 additions and 7 deletions

View File

@ -27,6 +27,7 @@ import java.io.IOException;
import java.util.*; import java.util.*;
import java.util.concurrent.*; import java.util.concurrent.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import co.elastic.clients.elasticsearch._types.query_dsl.Operator;
@Service @Service
@ -69,7 +70,7 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
.mappings(m -> m .mappings(m -> m
.dynamic(DynamicMapping.False) .dynamic(DynamicMapping.False)
.properties("name", p -> p.text(t -> t.analyzer("ik_max_word").fields("keyword", f -> f.keyword(k -> k)))) .properties("name", p -> p.text(t -> t.analyzer("ik_max_word").fields("keyword", f -> f.keyword(k -> k))))
.properties("content", p -> p.text(t -> t.analyzer("ik_max_word").searchAnalyzer("ik_max_word"))) .properties("content", p -> p.text(t -> t.analyzer("ik_max_word")))
.properties("dataset_id", p -> p.keyword(k -> k)) .properties("dataset_id", p -> p.keyword(k -> k))
.properties("source_url", p -> p.keyword(k -> k)) .properties("source_url", p -> p.keyword(k -> k))
.properties("dataset_name", p -> p.keyword(k -> k)) .properties("dataset_name", p -> p.keyword(k -> k))
@ -279,11 +280,47 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
SearchResponse<TDatasetFiles> response = client.search(s -> s SearchResponse<TDatasetFiles> response = client.search(s -> s
.index(datasetId) .index(datasetId)
.query(q -> q.bool(b -> b .query(q -> q.bool(b -> b
.should(s1 -> s1.match(m -> m.field("name").query(keyword).analyzer("ik_smart"))) // 1 name 字段分词查询粗粒度
.should(s2 -> s2.match(m -> m.field("content").query(keyword).analyzer("ik_smart"))) .should(s1 -> s1.match(m -> m
.should(s3 -> s3.match(m -> m.field("name").query(keyword).analyzer("ik_max_word"))) .field("name")
.should(s4 -> s4.match(m -> m.field("content").query(keyword).analyzer("ik_max_word"))) .query(keyword)
.should(s5 -> s5.term(t -> t.field("name.keyword").value(keyword))) .analyzer("ik_smart")
.boost(4.0f)
))
// 2 name 字段分词查询细粒度
.should(s2 -> s2.match(m -> m
.field("name")
.query(keyword)
.analyzer("ik_max_word")
.boost(3.0f)
))
// 3 name 字段 operator:AND 精确匹配每个分词
.should(s3 -> s3.match(m -> m
.field("name")
.query(keyword)
.operator(Operator.And)
.boost(7.5f)
))
// 4 name.keyword 精确匹配完整字符串
.should(s4 -> s4.term(t -> t
.field("name.keyword")
.value(keyword)
.boost(7.0f)
))
// 5 content 字段分词查询粗粒度
.should(s5 -> s5.match(m -> m
.field("content")
.query(keyword)
.analyzer("ik_smart")
.boost(2.5f)
))
// 6 content 字段分词查询细粒度
.should(s6 -> s6.match(m -> m
.field("content")
.query(keyword)
.analyzer("ik_max_word")
.boost(1.0f)
))
)) ))
.size(500) .size(500)
.highlight(h -> h .highlight(h -> h
@ -293,6 +330,7 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
TDatasetFiles.class TDatasetFiles.class
); );
Map<String, RecordDto> uniqueResults = new LinkedHashMap<>(); Map<String, RecordDto> uniqueResults = new LinkedHashMap<>();
for (Hit<TDatasetFiles> hit : response.hits().hits()) { for (Hit<TDatasetFiles> hit : response.hits().hits()) {
TDatasetFiles d = hit.source(); TDatasetFiles d = hit.source();
@ -421,7 +459,17 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService {
// 限制不要超过 upper // 限制不要超过 upper
if(normalizedScore > 0.99){ if(normalizedScore > 0.99){
normalizedScore = 0.99; double min = 0.98;
double max = 0.99;
// rawScore 归一化到 [0,1]
double factor = Math.min(rawScore / maxScore+1, 1.0);
// 根据 factor 映射到 [0.98, 0.99) 并加微小随机浮动
normalizedScore = min + factor * (max - min)
+ ThreadLocalRandom.current().nextDouble(0, 0.001);
// 确保不超过 0.9999
normalizedScore = Math.min(normalizedScore, 0.9999);
} }
log.warn("Raw score: {}, normalized score with influence: {}", rawScore, normalizedScore); log.warn("Raw score: {}, normalized score with influence: {}", rawScore, normalizedScore);