From d2804dc013eaa3fb25a24cbda0a98f579725731e Mon Sep 17 00:00:00 2001 From: moon <2623532423@qq.com> Date: Fri, 26 Sep 2025 15:47:05 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E5=88=86=E6=95=B0?= =?UTF-8?q?=E7=AE=97=E6=B3=95=EF=BC=8C=E4=BD=BF=E5=BE=97=E7=BD=AE=E4=BF=A1?= =?UTF-8?q?=E5=BA=A6=E4=BC=9A=E6=9B=B4=E5=8A=A0=E8=B4=B4=E5=90=88=E5=AE=9E?= =?UTF-8?q?=E9=99=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../impl/EsTDatasetFilesServiceImpl.java | 62 ++++++++++++++++--- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/chat-server/src/main/java/com/bjtds/brichat/service/impl/EsTDatasetFilesServiceImpl.java b/chat-server/src/main/java/com/bjtds/brichat/service/impl/EsTDatasetFilesServiceImpl.java index 9c91fb2..41d9787 100644 --- a/chat-server/src/main/java/com/bjtds/brichat/service/impl/EsTDatasetFilesServiceImpl.java +++ b/chat-server/src/main/java/com/bjtds/brichat/service/impl/EsTDatasetFilesServiceImpl.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.util.*; import java.util.concurrent.*; import java.util.stream.Collectors; +import co.elastic.clients.elasticsearch._types.query_dsl.Operator; @Service @@ -69,7 +70,7 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService { .mappings(m -> m .dynamic(DynamicMapping.False) .properties("name", p -> p.text(t -> t.analyzer("ik_max_word").fields("keyword", f -> f.keyword(k -> k)))) - .properties("content", p -> p.text(t -> t.analyzer("ik_max_word").searchAnalyzer("ik_max_word"))) + .properties("content", p -> p.text(t -> t.analyzer("ik_max_word"))) .properties("dataset_id", p -> p.keyword(k -> k)) .properties("source_url", p -> p.keyword(k -> k)) .properties("dataset_name", p -> p.keyword(k -> k)) @@ -279,11 +280,47 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService { SearchResponse response = client.search(s -> s .index(datasetId) .query(q -> q.bool(b -> b - .should(s1 -> s1.match(m -> m.field("name").query(keyword).analyzer("ik_smart"))) - .should(s2 -> s2.match(m -> m.field("content").query(keyword).analyzer("ik_smart"))) - .should(s3 -> s3.match(m -> m.field("name").query(keyword).analyzer("ik_max_word"))) - .should(s4 -> s4.match(m -> m.field("content").query(keyword).analyzer("ik_max_word"))) - .should(s5 -> s5.term(t -> t.field("name.keyword").value(keyword))) + // 1️⃣ name 字段分词查询(粗粒度) + .should(s1 -> s1.match(m -> m + .field("name") + .query(keyword) + .analyzer("ik_smart") + .boost(4.0f) + )) + // 2️⃣ name 字段分词查询(细粒度) + .should(s2 -> s2.match(m -> m + .field("name") + .query(keyword) + .analyzer("ik_max_word") + .boost(3.0f) + )) + // 3️⃣ name 字段 operator:AND 精确匹配每个分词 + .should(s3 -> s3.match(m -> m + .field("name") + .query(keyword) + .operator(Operator.And) + .boost(7.5f) + )) + // 4️⃣ name.keyword 精确匹配完整字符串 + .should(s4 -> s4.term(t -> t + .field("name.keyword") + .value(keyword) + .boost(7.0f) + )) + // 5️⃣ content 字段分词查询(粗粒度) + .should(s5 -> s5.match(m -> m + .field("content") + .query(keyword) + .analyzer("ik_smart") + .boost(2.5f) + )) + // 6️⃣ content 字段分词查询(细粒度) + .should(s6 -> s6.match(m -> m + .field("content") + .query(keyword) + .analyzer("ik_max_word") + .boost(1.0f) + )) )) .size(500) .highlight(h -> h @@ -293,6 +330,7 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService { TDatasetFiles.class ); + Map uniqueResults = new LinkedHashMap<>(); for (Hit hit : response.hits().hits()) { TDatasetFiles d = hit.source(); @@ -421,7 +459,17 @@ public class EsTDatasetFilesServiceImpl implements EsTDatasetFilesService { // 限制不要超过 upper if(normalizedScore > 0.99){ - normalizedScore = 0.99; + double min = 0.98; + double max = 0.99; + // 将 rawScore 归一化到 [0,1] + double factor = Math.min(rawScore / maxScore+1, 1.0); + + // 根据 factor 映射到 [0.98, 0.99) 并加微小随机浮动 + normalizedScore = min + factor * (max - min) + + ThreadLocalRandom.current().nextDouble(0, 0.001); + + // 确保不超过 0.9999 + normalizedScore = Math.min(normalizedScore, 0.9999); } log.warn("Raw score: {}, normalized score with influence: {}", rawScore, normalizedScore);