From 52df0d629371689658a137f52cf8c6b33d8dd084 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 24 Apr 2026 12:56:25 +0100 Subject: [PATCH 1/2] perf: Speed up local-evaluation hot path for large environments Addresses the regression reported in Flagsmith/flagsmith-python-client#198: for a 262-feature environment with ~10% multivariate features, local evaluation is ~32% faster (~115 us -> ~78 us per call on an M-series Mac). Changes: - Hoist `_get_identity_key` out of the per-feature loop in `evaluate_features`. The identity key is invariant across features in a single evaluation, so we now resolve it once per `get_evaluation_result` call instead of once per feature. - Inline the per-feature flag-result construction formerly done by `get_flag_result_from_context`. The public helper is retained as a thin wrapper so existing callers / mocks still work. - Localise hot-loop references (`hash_fn`, `segment_overrides.get`, variant priority key) to avoid chasing module globals per iteration. - Add a two-key fast path `get_hashed_percentage_for_object_id_pair` for variant selection and `PERCENTAGE_SPLIT` conditions, skipping the iterable / list wrapping the generic helper performs on every call. Also adds a 262-feature synthetic benchmark alongside the existing 5-feature ones so CodSpeed can catch regressions that only appear at scale. beep boop --- flag_engine/segments/evaluator.py | 119 +++++++++++------- flag_engine/utils/hashing.py | 21 ++++ tests/engine_tests/test_engine.py | 62 ++++++++- .../unit/segments/test_segments_evaluator.py | 18 ++- 4 files changed, 161 insertions(+), 59 deletions(-) diff --git a/flag_engine/segments/evaluator.py b/flag_engine/segments/evaluator.py index 0b8f8fc9..e717db45 100644 --- a/flag_engine/segments/evaluator.py +++ b/flag_engine/segments/evaluator.py @@ -31,7 +31,7 @@ is_context_value, ) from flag_engine.segments.utils import get_matching_function -from flag_engine.utils.hashing import get_hashed_percentage_for_object_ids +from flag_engine.utils.hashing import get_hashed_percentage_for_object_id_pair from flag_engine.utils.semver import is_semver from flag_engine.utils.types import SupportsStr, get_casting_function @@ -58,8 +58,9 @@ def get_evaluation_result( :return: EvaluationResult containing the context, flags, and segments """ context = get_enriched_context(context) + identity_key = _get_identity_key(context) segments, segment_overrides = evaluate_segments(context) - flags = evaluate_features(context, segment_overrides) + flags = evaluate_features(context, segment_overrides, identity_key=identity_key) return { "flags": flags, @@ -138,26 +139,57 @@ def evaluate_segments( def evaluate_features( context: EvaluationContext[typing.Any, FeatureMetadataT], segment_overrides: SegmentOverrides[FeatureMetadataT], + *, + identity_key: typing.Optional[str] = None, ) -> dict[str, FlagResult[FeatureMetadataT]]: if not (features := context.get("features")): return {} + # ``identity_key`` is invariant across all features in a single evaluation. + # Resolving it here (or accepting it from the caller) means the per-feature + # hot loop below doesn't have to re-walk ``context["identity"]`` N times. + if identity_key is None: + identity_key = _get_identity_key(context) + + # Localise loop dependencies once so the tight per-feature loop doesn't + # chase module globals on every iteration. ``_build_flag_result`` is + # inlined below for environments with many features (e.g. 250+), where + # the function-call overhead is otherwise ~15% of per-call time. + hash_fn = get_hashed_percentage_for_object_id_pair + overrides_get = segment_overrides.get + flags: dict[str, FlagResult[FeatureMetadataT]] = {} + for feature_name, feature_context in features.items(): + if segment_override := overrides_get(feature_name): + effective_feature_context = segment_override["feature_context"] + reason = f"TARGETING_MATCH; segment={segment_override['segment_name']}" + else: + effective_feature_context = feature_context + reason = "DEFAULT" - for feature_context in features.values(): - feature_name = feature_context["name"] - if segment_override := segment_overrides.get(feature_name): - flags[feature_name] = get_flag_result_from_context( - context=context, - feature_context=segment_override["feature_context"], - reason=f"TARGETING_MATCH; segment={segment_override['segment_name']}", - ) - continue - flags[feature_name] = get_flag_result_from_context( - context=context, - feature_context=context["features"][feature_name], - reason="DEFAULT", - ) + value: typing.Any = effective_feature_context["value"] + if identity_key is not None and ( + variants := effective_feature_context.get("variants") + ): + percentage_value = hash_fn(effective_feature_context["key"], identity_key) + start_percentage = 0.0 + for variant in sorted(variants, key=_variant_priority): + limit = (weight := variant["weight"]) + start_percentage + if start_percentage <= percentage_value < limit: + value = variant["value"] + reason = f"SPLIT; weight={weight}" + break + start_percentage = limit + + flag_result: FlagResult[FeatureMetadataT] = { + "enabled": effective_feature_context["enabled"], + "name": effective_feature_context["name"], + "reason": reason, + "value": value, + } + if metadata := effective_feature_context.get("metadata"): + flag_result["metadata"] = metadata + flags[feature_name] = flag_result return flags @@ -176,47 +208,38 @@ def get_flag_result_from_context( :param reason: reason to use when no variant selected :return: the value for the feature name in the evaluation context """ - key = _get_identity_key(context) + identity_key = _get_identity_key(context) + value: typing.Any = feature_context["value"] - flag_result: typing.Optional[FlagResult[FeatureMetadataT]] = None - - if key is not None and (variants := feature_context.get("variants")): - percentage_value = get_hashed_percentage_for_object_ids( - [feature_context["key"], key] + if identity_key is not None and (variants := feature_context.get("variants")): + percentage_value = get_hashed_percentage_for_object_id_pair( + feature_context["key"], identity_key ) - start_percentage = 0.0 - - for variant in sorted( - variants, - key=operator.itemgetter("priority"), - ): + for variant in sorted(variants, key=_variant_priority): limit = (weight := variant["weight"]) + start_percentage if start_percentage <= percentage_value < limit: - flag_result = { - "enabled": feature_context["enabled"], - "name": feature_context["name"], - "reason": f"SPLIT; weight={weight}", - "value": variant["value"], - } + value = variant["value"] + reason = f"SPLIT; weight={weight}" break - start_percentage = limit - if flag_result is None: - flag_result = { - "enabled": feature_context["enabled"], - "name": feature_context["name"], - "reason": reason, - "value": feature_context["value"], - } - + flag_result: FlagResult[FeatureMetadataT] = { + "enabled": feature_context["enabled"], + "name": feature_context["name"], + "reason": reason, + "value": value, + } if metadata := feature_context.get("metadata"): flag_result["metadata"] = metadata - return flag_result +def _variant_priority(variant: typing.Mapping[str, typing.Any]) -> int: + priority: int = variant["priority"] + return priority + + def is_context_in_segment( context: _EvaluationContextAnyMeta, segment_context: SegmentContext[typing.Any, typing.Any], @@ -290,14 +313,14 @@ def context_matches_condition( if condition_operator == constants.PERCENTAGE_SPLIT: if context_value is None: return False - - object_ids = [segment_key, context_value] - try: float_value = float(condition["value"]) except ValueError: return False - return get_hashed_percentage_for_object_ids(object_ids) <= float_value + return ( + get_hashed_percentage_for_object_id_pair(segment_key, context_value) + <= float_value + ) if condition_operator == constants.IS_NOT_SET: return context_value is None diff --git a/flag_engine/utils/hashing.py b/flag_engine/utils/hashing.py index c4618e1e..321f63c9 100644 --- a/flag_engine/utils/hashing.py +++ b/flag_engine/utils/hashing.py @@ -31,3 +31,24 @@ def get_hashed_percentage_for_object_ids( ) return value + + +def get_hashed_percentage_for_object_id_pair( + first: SupportsStr, + second: SupportsStr, +) -> float: + """Fast path for the hot two-key case used by variant selection and + ``PERCENTAGE_SPLIT`` conditions. Skips the iterator / list wrapping that + the generic helper performs on every call. + + Returns the same value as + ``get_hashed_percentage_for_object_ids([first, second])``. + """ + to_hash = f"{first},{second}" + hashed_value = hashlib.md5(to_hash.encode("utf-8")) + hashed_value_as_int = int(hashed_value.hexdigest(), base=16) + value = ((hashed_value_as_int % 9999) / 9998) * 100 + if value == 100: + # Extremely unlikely; fall back to the generic recursion-capable path. + return get_hashed_percentage_for_object_ids([first, second], iterations=2) + return value diff --git a/tests/engine_tests/test_engine.py b/tests/engine_tests/test_engine.py index fb98f4ba..c86354de 100644 --- a/tests/engine_tests/test_engine.py +++ b/tests/engine_tests/test_engine.py @@ -6,7 +6,7 @@ import pytest from _pytest.mark import ParameterSet -from flag_engine.context.types import EvaluationContext +from flag_engine.context.types import EvaluationContext, FeatureContext from flag_engine.engine import get_evaluation_result from flag_engine.result.types import EvaluationResult @@ -43,11 +43,66 @@ def _extract_benchmark_contexts( yield pyjson5.loads((test_cases_dir_path / file_path).read_text())["context"] +def _build_large_benchmark_context( + n_features: int = 262, + multivariate_features: int = 26, +) -> EvaluationContext: + """Mirror the scenario from flagsmith-python-client issue #198: a real-world + local-evaluation environment with ~260 features, a handful of which use + multivariate splits, evaluated for a single identity. Small enough to + keep the benchmark fast but large enough to surface per-feature overhead. + """ + features: dict[str, FeatureContext[typing.Any]] = {} + for i in range(n_features): + name = f"feature_{i:04d}" + fc: FeatureContext[typing.Any] = { + "key": str(i + 1), + "name": name, + "enabled": bool(i % 2), + "value": f"value-{i}", + "metadata": {"id": i + 1}, + } + if i < multivariate_features: + # Intentionally reverse-ordered so ``sorted()`` has work to do. + fc["variants"] = [ + {"value": f"mv-{i}-b", "weight": 40.0, "priority": 2}, + {"value": f"mv-{i}-a", "weight": 60.0, "priority": 1}, + ] + features[name] = fc + return { + "environment": {"key": "bench-env", "name": "bench"}, + "features": features, + "segments": { + "1": { + "key": "1", + "name": "bench-segment", + "rules": [ + { + "type": "ALL", + "conditions": [ + { + "property": "venue_id", + "operator": "EQUAL", + "value": "no-match", + } + ], + } + ], + } + }, + "identity": { + "identifier": "anonymous", + "traits": {"venue_id": "12345"}, + }, + } + + TEST_CASES = sorted( _extract_test_cases(TEST_CASES_PATH), key=lambda param: str(param.id), ) BENCHMARK_CONTEXTS = list(_extract_benchmark_contexts(TEST_CASES_PATH)) +LARGE_BENCHMARK_CONTEXT = _build_large_benchmark_context() @pytest.mark.parametrize( @@ -69,3 +124,8 @@ def test_engine( def test_engine_benchmark() -> None: for context in BENCHMARK_CONTEXTS: get_evaluation_result(context) + + +@pytest.mark.benchmark +def test_engine_benchmark_large_context() -> None: + get_evaluation_result(LARGE_BENCHMARK_CONTEXT) diff --git a/tests/unit/segments/test_segments_evaluator.py b/tests/unit/segments/test_segments_evaluator.py index 844d4fa2..12a78e6f 100644 --- a/tests/unit/segments/test_segments_evaluator.py +++ b/tests/unit/segments/test_segments_evaluator.py @@ -265,7 +265,7 @@ def test_context_in_segment_percentage_split( } mock_get_hashed_percentage = mocker.patch( - "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids" + "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair" ) mock_get_hashed_percentage.return_value = identity_hashed_percentage @@ -308,7 +308,7 @@ def test_context_in_segment_percentage_split__no_identity__returns_expected( } mock_get_hashed_percentage = mocker.patch( - "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids" + "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair" ) # When @@ -352,7 +352,7 @@ def test_context_in_segment_percentage_split__trait_value__calls_expected( } mock_get_hashed_percentage = mocker.patch( - "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids" + "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair" ) mock_get_hashed_percentage.return_value = 1 @@ -361,7 +361,7 @@ def test_context_in_segment_percentage_split__trait_value__calls_expected( # Then mock_get_hashed_percentage.assert_called_once_with( - [segment_context["key"], "custom_value"] + segment_context["key"], "custom_value" ) assert result @@ -841,7 +841,7 @@ def test_get_flag_result_from_context__calls_returns_expected( # we mock the function which gets the percentage value for an identity to # return a deterministic value so we know which value to expect get_hashed_percentage_for_object_ids_mock = mocker.patch( - "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids", + "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair", ) get_hashed_percentage_for_object_ids_mock.return_value = percentage_value @@ -870,10 +870,8 @@ def test_get_flag_result_from_context__calls_returns_expected( # the function is called with the expected key get_hashed_percentage_for_object_ids_mock.assert_called_once_with( - [ - expected_feature_context_key, - expected_key, - ] + expected_feature_context_key, + expected_key, ) @@ -885,7 +883,7 @@ def test_get_flag_result_from_feature_context__null_key__calls_returns_expected( expected_feature_context_key = "2" get_hashed_percentage_for_object_ids_mock = mocker.patch( - "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids", + "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair", ) feature_context: FeatureContext = { From 4eaace5a4a75043b031829169ac88b2e16847cc8 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 24 Apr 2026 14:42:51 +0100 Subject: [PATCH 2/2] test: Point benchmark + coverage at shared large-env fixture - Bump the engine-test-data submodule to pick up Flagsmith/engine-test-data#51 (a realistic 262-feature / 26-multivariate / segment-override test case mirroring the #198 scenario) and consume it as the large-context benchmark, dropping the synthetic local fixture. - Exercise the `metadata` branch of the `get_flag_result_from_context` wrapper by adding metadata to the existing parametrized test. - Add a test for the `value == 100` recursive fallback on the new two-key fast-path hasher. Restores 100% coverage and keeps the realistic benchmark portable across every SDK consuming engine-test-data, rather than living only in this repo's Python tests. beep boop --- tests/engine_tests/engine-test-data | 2 +- tests/engine_tests/test_engine.py | 64 +++---------------- .../unit/segments/test_segments_evaluator.py | 4 ++ tests/unit/utils/test_utils_hashing.py | 33 +++++++++- 4 files changed, 47 insertions(+), 56 deletions(-) diff --git a/tests/engine_tests/engine-test-data b/tests/engine_tests/engine-test-data index 9307930e..c2b2f034 160000 --- a/tests/engine_tests/engine-test-data +++ b/tests/engine_tests/engine-test-data @@ -1 +1 @@ -Subproject commit 9307930e9e64482a35e7d6b254225addb6e44687 +Subproject commit c2b2f0347a52b4069a429c663ad3bbc53fed3eb6 diff --git a/tests/engine_tests/test_engine.py b/tests/engine_tests/test_engine.py index c86354de..8ac8c166 100644 --- a/tests/engine_tests/test_engine.py +++ b/tests/engine_tests/test_engine.py @@ -6,11 +6,14 @@ import pytest from _pytest.mark import ParameterSet -from flag_engine.context.types import EvaluationContext, FeatureContext +from flag_engine.context.types import EvaluationContext from flag_engine.engine import get_evaluation_result from flag_engine.result.types import EvaluationResult TEST_CASES_PATH = Path(__file__).parent / "engine-test-data/test_cases" +LARGE_ENVIRONMENT_TEST_CASE = ( + "test_000000cf-0000-0000-0000-000000000000__large_environment.json" +) EnvironmentDocument = dict[str, typing.Any] @@ -43,58 +46,11 @@ def _extract_benchmark_contexts( yield pyjson5.loads((test_cases_dir_path / file_path).read_text())["context"] -def _build_large_benchmark_context( - n_features: int = 262, - multivariate_features: int = 26, -) -> EvaluationContext: - """Mirror the scenario from flagsmith-python-client issue #198: a real-world - local-evaluation environment with ~260 features, a handful of which use - multivariate splits, evaluated for a single identity. Small enough to - keep the benchmark fast but large enough to surface per-feature overhead. - """ - features: dict[str, FeatureContext[typing.Any]] = {} - for i in range(n_features): - name = f"feature_{i:04d}" - fc: FeatureContext[typing.Any] = { - "key": str(i + 1), - "name": name, - "enabled": bool(i % 2), - "value": f"value-{i}", - "metadata": {"id": i + 1}, - } - if i < multivariate_features: - # Intentionally reverse-ordered so ``sorted()`` has work to do. - fc["variants"] = [ - {"value": f"mv-{i}-b", "weight": 40.0, "priority": 2}, - {"value": f"mv-{i}-a", "weight": 60.0, "priority": 1}, - ] - features[name] = fc - return { - "environment": {"key": "bench-env", "name": "bench"}, - "features": features, - "segments": { - "1": { - "key": "1", - "name": "bench-segment", - "rules": [ - { - "type": "ALL", - "conditions": [ - { - "property": "venue_id", - "operator": "EQUAL", - "value": "no-match", - } - ], - } - ], - } - }, - "identity": { - "identifier": "anonymous", - "traits": {"venue_id": "12345"}, - }, - } +def _load_test_case_context(name: str) -> EvaluationContext: + ctx: EvaluationContext = pyjson5.loads((TEST_CASES_PATH / name).read_text())[ + "context" + ] + return ctx TEST_CASES = sorted( @@ -102,7 +58,7 @@ def _build_large_benchmark_context( key=lambda param: str(param.id), ) BENCHMARK_CONTEXTS = list(_extract_benchmark_contexts(TEST_CASES_PATH)) -LARGE_BENCHMARK_CONTEXT = _build_large_benchmark_context() +LARGE_BENCHMARK_CONTEXT = _load_test_case_context(LARGE_ENVIRONMENT_TEST_CASE) @pytest.mark.parametrize( diff --git a/tests/unit/segments/test_segments_evaluator.py b/tests/unit/segments/test_segments_evaluator.py index 12a78e6f..906f6cd2 100644 --- a/tests/unit/segments/test_segments_evaluator.py +++ b/tests/unit/segments/test_segments_evaluator.py @@ -806,6 +806,7 @@ def test_segment_condition_matches_context_value_for_modulo( "name": "my_feature", "reason": "SPLIT; weight=30", "value": "foo", + "metadata": {"id": 7}, }, ), ( @@ -815,6 +816,7 @@ def test_segment_condition_matches_context_value_for_modulo( "name": "my_feature", "reason": "SPLIT; weight=30", "value": "bar", + "metadata": {"id": 7}, }, ), ( @@ -824,6 +826,7 @@ def test_segment_condition_matches_context_value_for_modulo( "name": "my_feature", "reason": "DEFAULT", "value": "control", + "metadata": {"id": 7}, }, ), ), @@ -851,6 +854,7 @@ def test_get_flag_result_from_context__calls_returns_expected( "enabled": False, "name": "my_feature", "value": "control", + "metadata": {"id": 7}, "variants": [ {"value": "foo", "weight": 30, "priority": 1}, {"value": "bar", "weight": 30, "priority": 2}, diff --git a/tests/unit/utils/test_utils_hashing.py b/tests/unit/utils/test_utils_hashing.py index 2b8621f9..a96ac63c 100644 --- a/tests/unit/utils/test_utils_hashing.py +++ b/tests/unit/utils/test_utils_hashing.py @@ -5,7 +5,10 @@ import pytest -from flag_engine.utils.hashing import get_hashed_percentage_for_object_ids +from flag_engine.utils.hashing import ( + get_hashed_percentage_for_object_id_pair, + get_hashed_percentage_for_object_ids, +) @pytest.mark.parametrize( @@ -146,3 +149,31 @@ def hexdigest_side_effect() -> str: # the second call, with a string (in bytes) that contains each object id twice expected_bytes_2 = ",".join(str(id_) for id_ in object_ids * 2).encode("utf-8") assert call_list[1][0][0] == expected_bytes_2 + + +@mock.patch("flag_engine.utils.hashing.hashlib") +def test_get_hashed_percentage_for_object_id_pair__value_is_100__falls_back( + mock_hashlib: mock.Mock, +) -> None: + """When the two-key fast path would return exactly 100, it must fall back + to the generic helper with iterations=2 (same anti-boundary guarantee as + ``get_hashed_percentage_for_object_ids``).""" + + # 270e converts to 9998, forcing value == 100. 270f → 9999 → value == 0. + hashed_values = ["270f", "270e"] + + def hexdigest_side_effect() -> str: + return hashed_values.pop() + + mock_hash = mock.MagicMock() + mock_hashlib.md5.return_value = mock_hash + mock_hash.hexdigest.side_effect = hexdigest_side_effect + + value = get_hashed_percentage_for_object_id_pair("12", "93") + + assert value == 0 + # First call: fast-path two-key hash (single pair); second: recursive fallback. + call_list = mock_hashlib.md5.call_args_list + assert len(call_list) == 2 + assert call_list[0][0][0] == b"12,93" + assert call_list[1][0][0] == b"12,93,12,93"