From 7d3e6f656654b4e81c4a48379b457f8ecd3af284 Mon Sep 17 00:00:00 2001 From: Quentin Kaiser Date: Mon, 20 Oct 2025 11:34:43 +0200 Subject: [PATCH 1/3] fix: handle identity field as dict or array in ComponentEvidence deserialization The identity field in ComponentEvidence can be either a single object (dict) in CycloneDX 1.5 or a single object/array of objects in CycloneDX 1.6. The previous implementation failed when deserializing a single object format, throwing 'str' object has no attribute 'items' error. Added _IdentitySetSerializationHelper to properly handle both formats: - json_normalize: serializes Identity objects as list while preserving view context - json_deserialize: handles both dict (single) and list (array) formats Also updated _ComponentEvidenceSerializationHelper.json_denormalize to normalize single dict format to array before deserialization for consistency. Fixes deserialization of CycloneDX 1.5 and 1.6 SBOMs with component evidence. Signed-off-by: Quentin Kaiser --- cyclonedx/model/component_evidence.py | 23 +++++ tests/test_model_component_evidence.py | 137 +++++++++++++++++++++++++ 2 files changed, 160 insertions(+) diff --git a/cyclonedx/model/component_evidence.py b/cyclonedx/model/component_evidence.py index c23acac9d..6d12f0a41 100644 --- a/cyclonedx/model/component_evidence.py +++ b/cyclonedx/model/component_evidence.py @@ -185,6 +185,24 @@ def xml_denormalize(cls, o: 'XmlElement', *, return [BomRef(value=t.get('ref')) for t in o] +class _IdentitySetSerializationHelper(serializable.helpers.BaseHelper): + """ THIS CLASS IS NON-PUBLIC API """ + + @classmethod + def json_normalize(cls, o: Iterable['Identity'], *, + view: Optional[type[serializable.ViewType]], + **__: Any) -> list[dict[str, Any]]: + # Serialize identity as a list of dicts, preserving the view context + return [json_loads(item.as_json(view)) for item in o] # type: ignore[attr-defined] + + @classmethod + def json_deserialize(cls, o: Union[dict[str, Any], list[dict[str, Any]]]) -> list['Identity']: + # Handle identity field which can be a dict (CycloneDX 1.5) or list of dicts (CycloneDX 1.6) + if isinstance(o, dict): + return [Identity.from_json(o)] # type: ignore[attr-defined] + return [Identity.from_json(item) for item in o] # type: ignore[attr-defined] + + @serializable.serializable_class(ignore_unknown_during_deserialization=True) class Identity: """ @@ -654,6 +672,7 @@ def __init__( @property @serializable.view(SchemaVersion1Dot5) @serializable.view(SchemaVersion1Dot6) + @serializable.type_mapping(_IdentitySetSerializationHelper) @serializable.xml_sequence(1) @serializable.xml_array(serializable.XmlArraySerializationType.FLAT, 'identity') def identity(self) -> 'SortedSet[Identity]': @@ -768,6 +787,10 @@ def json_normalize(cls, o: ComponentEvidence, *, @classmethod def json_denormalize(cls, o: dict[str, Any], **__: Any) -> Any: + # Handle identity field which can be a dict (CycloneDX 1.5) or list of dicts (CycloneDX 1.6) + # Before passing to ComponentEvidence.from_json, ensure it's always a list + if 'identity' in o and isinstance(o['identity'], dict): + o = {**o, 'identity': [o['identity']]} return ComponentEvidence.from_json(o) # type:ignore[attr-defined] @classmethod diff --git a/tests/test_model_component_evidence.py b/tests/test_model_component_evidence.py index f4561cbb8..0fd172ff4 100644 --- a/tests/test_model_component_evidence.py +++ b/tests/test_model_component_evidence.py @@ -201,6 +201,143 @@ def test_not_same_1(self) -> None: self.assertNotEqual(hash(ce_1), hash(ce_2)) self.assertFalse(ce_1 == ce_2) + def test_identity_deserialization_single_dict_format(self) -> None: + """Test deserialization of identity field as a single dict (CycloneDX 1.5 format)""" + # This is the format that was failing before the fix + json_data = { + 'identity': { + 'field': 'name', + 'confidence': 1.0, + 'concludedValue': 'test-component' + } + } + ce = ComponentEvidence.from_json(json_data) # type: ignore[attr-defined] + self.assertEqual(len(ce.identity), 1) + identity = list(ce.identity)[0] + self.assertEqual(identity.field, IdentityField.NAME) + self.assertEqual(identity.confidence, Decimal('1.0')) + self.assertEqual(identity.concluded_value, 'test-component') + + def test_identity_deserialization_array_format(self) -> None: + """Test deserialization of identity field as an array (CycloneDX 1.6 format)""" + json_data = { + 'identity': [ + { + 'field': 'name', + 'confidence': 1.0, + 'concludedValue': 'test-component' + }, + { + 'field': 'version', + 'confidence': 0.8, + 'concludedValue': '1.0.0' + } + ] + } + ce = ComponentEvidence.from_json(json_data) # type: ignore[attr-defined] + self.assertEqual(len(ce.identity), 2) + + # Check that both identities are present + identities = sorted(ce.identity, key=lambda x: x.field.value) + self.assertEqual(identities[0].field, IdentityField.NAME) + self.assertEqual(identities[0].concluded_value, 'test-component') + self.assertEqual(identities[1].field, IdentityField.VERSION) + self.assertEqual(identities[1].concluded_value, '1.0.0') + + def test_identity_dict_format_converts_to_array_internally(self) -> None: + """Test that single dict identity format is converted to array format internally""" + # When deserializing a single dict, it should be normalized to array format + # before being passed to ComponentEvidence + json_data_dict = { + 'identity': { + 'field': 'name', + 'confidence': 1.0, + 'concludedValue': 'test-component' + } + } + + json_data_array = { + 'identity': [ + { + 'field': 'name', + 'confidence': 1.0, + 'concludedValue': 'test-component' + } + ] + } + + # Both formats should produce the same result + ce_from_dict = ComponentEvidence.from_json(json_data_dict) # type: ignore[attr-defined] + ce_from_array = ComponentEvidence.from_json(json_data_array) # type: ignore[attr-defined] + + self.assertEqual(len(ce_from_dict.identity), 1) + self.assertEqual(len(ce_from_array.identity), 1) + + # The identity objects should be equivalent + identity_dict = list(ce_from_dict.identity)[0] + identity_array = list(ce_from_array.identity)[0] + self.assertEqual(identity_dict.field, identity_array.field) + self.assertEqual(identity_dict.confidence, identity_array.confidence) + self.assertEqual(identity_dict.concluded_value, identity_array.concluded_value) + + def test_identity_dict_with_multiple_methods(self) -> None: + """Test deserialization of single identity dict with multiple methods""" + json_data = { + 'identity': { + 'field': 'purl', + 'confidence': 0.95, + 'concludedValue': 'pkg:npm/example@1.0.0', + 'methods': [ + { + 'technique': 'source-code-analysis', + 'confidence': 0.9, + 'value': 'Found in package.json' + }, + { + 'technique': 'binary-analysis', + 'confidence': 0.85, + 'value': 'Found in binary metadata' + } + ] + } + } + ce = ComponentEvidence.from_json(json_data) # type: ignore[attr-defined] + self.assertEqual(len(ce.identity), 1) + identity = list(ce.identity)[0] + self.assertEqual(identity.field, IdentityField.PURL) + self.assertEqual(len(identity.methods), 2) + + # Verify methods are properly deserialized + methods = sorted(identity.methods, key=lambda m: m.technique.value) + self.assertEqual(methods[0].technique, AnalysisTechnique.BINARY_ANALYSIS) + self.assertEqual(methods[0].confidence, Decimal('0.85')) + self.assertEqual(methods[1].technique, AnalysisTechnique.SOURCE_CODE_ANALYSIS) + self.assertEqual(methods[1].confidence, Decimal('0.9')) + + def test_identity_deserialization_dict_with_methods(self) -> None: + """Test deserialization of single identity dict with methods""" + json_data = { + 'identity': { + 'field': 'name', + 'confidence': 0.95, + 'concludedValue': 'test-lib', + 'methods': [ + { + 'technique': 'source-code-analysis', + 'confidence': 0.9, + 'value': 'Found in metadata' + } + ] + } + } + ce = ComponentEvidence.from_json(json_data) # type: ignore[attr-defined] + self.assertEqual(len(ce.identity), 1) + identity = list(ce.identity)[0] + self.assertEqual(len(identity.methods), 1) + method = list(identity.methods)[0] + self.assertEqual(method.technique, AnalysisTechnique.SOURCE_CODE_ANALYSIS) + self.assertEqual(method.confidence, Decimal('0.9')) + class TestModelCallStackFrame(TestCase): From 054334bfd5f652d726a02606c01138e87d6db19e Mon Sep 17 00:00:00 2001 From: Jan Kowalleck Date: Tue, 21 Oct 2025 11:48:22 +0200 Subject: [PATCH 2/3] wip Signed-off-by: Jan Kowalleck --- cyclonedx/model/component_evidence.py | 8 +++- .../json/1.6/component_evidence_identity.json | 45 +++++++++++++++++++ tests/test_deserialize_json.py | 10 +++++ tests/test_model_component_evidence.py | 5 +-- 4 files changed, 63 insertions(+), 5 deletions(-) create mode 100644 tests/_data/own/json/1.6/component_evidence_identity.json diff --git a/cyclonedx/model/component_evidence.py b/cyclonedx/model/component_evidence.py index c23acac9d..8e73243db 100644 --- a/cyclonedx/model/component_evidence.py +++ b/cyclonedx/model/component_evidence.py @@ -288,8 +288,8 @@ def __hash__(self) -> int: def __repr__(self) -> str: return f'' + f' concludedValue={self.concluded_value},' \ + f' methods={self.methods}, tools={self.tools}>' @serializable.serializable_class(ignore_unknown_during_deserialization=True) @@ -768,6 +768,10 @@ def json_normalize(cls, o: ComponentEvidence, *, @classmethod def json_denormalize(cls, o: dict[str, Any], **__: Any) -> Any: + if isinstance(identity := o.get('identity', []), dict): + # Handle identity field which can be a dict (CycloneDX 1.5) or list of dicts (CycloneDX 1.6) + # Before passing to ComponentEvidence.from_json, ensure it's always a list + o = {**o, 'identity': [identity]} return ComponentEvidence.from_json(o) # type:ignore[attr-defined] @classmethod diff --git a/tests/_data/own/json/1.6/component_evidence_identity.json b/tests/_data/own/json/1.6/component_evidence_identity.json new file mode 100644 index 000000000..0ef019e50 --- /dev/null +++ b/tests/_data/own/json/1.6/component_evidence_identity.json @@ -0,0 +1,45 @@ +{ + "$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json", + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": "urn:uuid:66fa5692-2e9d-45c5-830a-ec8ccaf7dcc9", + "version": 1, + "metadata": { + "component": { + "description": "test component evidence identity", + "type": "application", + "name": "test" + } + }, + "components": [ + { + "type": "operating-system", + "bom-ref": "alpine12", + "name": "alpine", + "version": "12", + "evidence": { + "identity": [ + { + "field": "name", + "confidence": 1.0 + }, + { + "field": "version", + "confidence": 0.98 + } + ] + } + }, + { + "type": "library", + "bom-ref": "libssl", + "name": "libssl", + "evidence": { + "identity": { + "field": "name", + "confidence": 1.0 + } + } + } + ] +} diff --git a/tests/test_deserialize_json.py b/tests/test_deserialize_json.py index 21ee621f5..695493780 100644 --- a/tests/test_deserialize_json.py +++ b/tests/test_deserialize_json.py @@ -127,3 +127,13 @@ def test_regression_issue690(self) -> None: json = json_loads(f.read()) bom: Bom = Bom.from_json(json) # <<< is expected to not crash self.assertIsNotNone(bom) + + def test_component_evidence_identity(self) -> None: + """Since 1.8 it is allowed to have component evidence identity as a list or an object""" + json_file = join(OWN_DATA_DIRECTORY, 'json', + SchemaVersion.V1_6.to_version(), + 'component_evidence_identity.json') + with open(json_file) as f: + json = json_loads(f.read()) + bom: Bom = Bom.from_json(json) # <<< is expected to not crash + self.assertIsNotNone(bom) diff --git a/tests/test_model_component_evidence.py b/tests/test_model_component_evidence.py index f4561cbb8..c0d42d34b 100644 --- a/tests/test_model_component_evidence.py +++ b/tests/test_model_component_evidence.py @@ -37,9 +37,9 @@ class TestModelComponentEvidence(TestCase): def test_no_params(self) -> None: ComponentEvidence() # Does not raise `NoPropertiesProvidedException` - def test_identity(self) -> None: + def test_identity_single(self) -> None: identity = Identity(field=IdentityField.NAME, confidence=Decimal('1'), concluded_value='test') - ce = ComponentEvidence(identity=[identity]) + ce = ComponentEvidence(identity=identity) self.assertEqual(len(ce.identity), 1) self.assertEqual(ce.identity.pop().field, 'name') @@ -201,7 +201,6 @@ def test_not_same_1(self) -> None: self.assertNotEqual(hash(ce_1), hash(ce_2)) self.assertFalse(ce_1 == ce_2) - class TestModelCallStackFrame(TestCase): def test_fields(self) -> None: From d7116f167d73c485b7bc77fe7cab5a5e240bece7 Mon Sep 17 00:00:00 2001 From: Jan Kowalleck Date: Tue, 21 Oct 2025 15:07:41 +0200 Subject: [PATCH 3/3] style Signed-off-by: Jan Kowalleck --- tests/test_model_component_evidence.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_model_component_evidence.py b/tests/test_model_component_evidence.py index c0d42d34b..2041d28ef 100644 --- a/tests/test_model_component_evidence.py +++ b/tests/test_model_component_evidence.py @@ -201,6 +201,7 @@ def test_not_same_1(self) -> None: self.assertNotEqual(hash(ce_1), hash(ce_2)) self.assertFalse(ce_1 == ce_2) + class TestModelCallStackFrame(TestCase): def test_fields(self) -> None: