Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/docs/multimodal-table/global-index/hybrid-search.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,11 @@ The third argument is an array of full-text route configs created by `named_stru
| `query` | Yes | N/A | LanceDB-style full-text query JSON for this route. |
| `limit` | No | Final limit | Top K results to retrieve from this text column before ranking. |
| `weight` | No | `1.0` | Weight for this route when ranking results. |
| `options` | No | Empty map | Route-specific full-text search options. |
| `options` | No | Empty map | Reserved for future full-text search options. Only an empty map is accepted. |

Within each route array, every `named_struct` should use the same fields because
Spark requires array elements to have the same struct type.
Spark requires array elements to have the same struct type. Full-text route `options` is
currently a reserved field; pass `map()` when the field is needed for struct-type consistency.

Use route `limit` values larger than the final limit when each route should contribute enough
candidates for ranking. For example, with a final limit of `10`, route limits such as `50` or `100`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,19 @@ public static HybridSearchRoute vector(

public static HybridSearchRoute fullText(
String queryJson, int limit, float weight, Map<String, String> options) {
checkFullTextOptions(options);
FullTextQuery query = FullTextQuery.fromJson(queryJson);
return new HybridSearchRoute(
RouteType.FULL_TEXT, query.columns().get(0), null, query, limit, weight, options);
}

private static void checkFullTextOptions(Map<String, String> options) {
if (options != null && !options.isEmpty()) {
throw new IllegalArgumentException(
"Full-text hybrid route options are not supported yet.");
}
}

private HybridSearchRoute(
RouteType routeType,
String fieldName,
Expand Down Expand Up @@ -239,6 +247,7 @@ public Builder options(Map<String, String> options) {

public HybridSearchRoute build() {
if (fullTextQuery != null) {
checkFullTextOptions(options);
return new HybridSearchRoute(
RouteType.FULL_TEXT,
fullTextQuery.columns().get(0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Collections;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

/** Tests for {@link FullTextQuery}. */
public class FullTextQueryTest {
Expand Down Expand Up @@ -172,4 +173,30 @@ public void testFullTextSearchKeepsStructuredQueryJson() {
"{\"match_phrase\":{\"column\":\"content\",\"terms\":\"paimon lake\","
+ "\"slop\":1}}");
}

@Test
public void testHybridFullTextRouteRejectsOptions() {
assertThatThrownBy(
() ->
HybridSearchRoute.fullText(
"{\"match\":{\"column\":\"content\","
+ "\"terms\":\"paimon lake\"}}",
10,
1.0f,
Collections.singletonMap("some.option", "x")))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("Full-text hybrid route options are not supported yet");

assertThatThrownBy(
() ->
HybridSearchRoute.builder()
.query(
"{\"match\":{\"column\":\"content\","
+ "\"terms\":\"paimon lake\"}}")
.limit(10)
.option("some.option", "x")
.build())
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("Full-text hybrid route options are not supported yet");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ default HybridSearchBuilder addFullTextRoute(String queryJson, int limit, float
return addFullTextRoute(queryJson, limit, weight, null);
}

/** Add a full-text-search route. */
/** Add a full-text-search route. Non-empty options are not supported yet. */
default HybridSearchBuilder addFullTextRoute(
String queryJson, int limit, float weight, Map<String, String> options) {
return addRoute(HybridSearchRoute.fullText(queryJson, limit, weight, options));
Expand Down
9 changes: 9 additions & 0 deletions paimon-python/pypaimon/table/source/hybrid_search_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
_RRF_K = 60.0


def _check_full_text_options(options: Dict[str, str]):
if options:
raise ValueError(
"Full-text hybrid route options are not supported yet.")


def _normalize_ranker(ranker: Optional[str]) -> str:
if ranker is None or not ranker.strip():
return RRF_RANKER
Expand Down Expand Up @@ -74,6 +80,8 @@ def __post_init__(self):
if self.weight <= 0:
raise ValueError("Weight must be positive, got: %s" % self.weight)
self.options = dict(self.options or {})
if self.route_type == self.FULL_TEXT:
_check_full_text_options(self.options)

@classmethod
def vector_route(
Expand All @@ -99,6 +107,7 @@ def full_text_route(
limit: int,
weight: float = 1.0,
options: Optional[Dict[str, str]] = None) -> 'HybridSearchRoute':
_check_full_text_options(options or {})
query = FullTextQuery.from_json(query_json)
return cls(
route_type=cls.FULL_TEXT,
Expand Down
17 changes: 17 additions & 0 deletions paimon-python/pypaimon/tests/vector_search_filter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1968,6 +1968,23 @@ def test_hybrid_search_rejects_data_filter_with_full_text_route(self):
builder.route_builders()
self.assertIn("full-text routes", str(ctx.exception))

def test_hybrid_search_rejects_full_text_route_options(self):
from pypaimon.table.source.hybrid_search_builder import (
HybridSearchBuilderImpl,
)

id_field = _field(0, "id")
content = _field(1, "content", "STRING")
table = _StubTable(fields=[id_field, content], entries=[])

with self.assertRaises(ValueError) as ctx:
HybridSearchBuilderImpl(table).add_full_text_route(
'{"match":{"column":"content","terms":"paimon search"}}',
10,
options={"some.option": "x"})
self.assertIn("Full-text hybrid route options are not supported yet",
str(ctx.exception))

def test_hybrid_search_partition_filter_prunes_full_text_route(self):
from pypaimon.table.source.hybrid_search_builder import (
HybridSearchBuilderImpl,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ case class VectorSearchQuery(override val args: Seq[Expression])
* Usage: hybrid_search(table_name, vector_routes, full_text_routes, limit[, ranker])
* - table_name: the Paimon table to search
* - vector_routes: route config array with field, query_vector, limit, weight, and options fields
* - full_text_routes: route config array with query, limit, weight, and options fields
* - full_text_routes: route config array with query, limit, weight, and empty options fields
* - limit: the final number of ranked top results to return
* - ranker: optional ranker for combining results from multiple routes
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,29 @@ class VectorSearchQueryTest extends AnyFunSuite {
assert(search.routes().get(0).weight() == 1.5f)
}

test("reject hybrid full-text route with non-empty options") {
val exception = intercept[IllegalArgumentException] {
HybridSearchQuery(Seq.empty).createHybridSearch(
innerTable,
Seq(
CreateArray(Seq.empty),
CreateArray(
Seq(
CreateNamedStruct(Seq(
Literal("query"),
Literal("""{"match":{"column":"content","terms":"paimon lake"}}"""),
Literal("options"),
CreateMap(Seq(Literal("some.option"), Literal("x")))
))
)),
Literal(5)
)
)
}

assert(exception.getMessage.contains("Full-text hybrid route options are not supported yet"))
}

test("create full-text search") {
val search = FullTextSearchQuery(Seq.empty).createFullTextSearch(
innerTable,
Expand Down
Loading