diff --git a/CHANGELOG.md b/CHANGELOG.md index a062e8047..d860be2b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Python server: Support optional metrics ([#828](https://github.com/mozilla/glean_parser/pull/828)) - BUGFIX: Correct event timestamp values in server language templates ([#831](https://github.com/mozilla/glean_parser/pull/831)) - Remove the `coverage` subcommand. The SDK also removed metric testing covergae ([#832](https://github.com/mozilla/glean_parser/pull/832)) +- Add Go support for parsing the Object metric type. The oneOf parameter type is currently unsupported. ## 18.2.0 diff --git a/glean_parser/go_server.py b/glean_parser/go_server.py index af4b5e509..21b1c0197 100644 --- a/glean_parser/go_server.py +++ b/glean_parser/go_server.py @@ -39,6 +39,7 @@ "datetime", "boolean", "string_list", + "object", ] @@ -66,6 +67,15 @@ def generate_metric_argument_name(metric: metrics.Metric) -> str: return f"{util.Camelize(metric.category)}{util.Camelize(metric.name)}" +def generate_object_type_name(metric: metrics.Metric) -> str: + """Generate the Go type name for an object metric.""" + return f"{util.Camelize(metric.category)}{util.Camelize(metric.name)}Object" + + +def clean_string(s: str) -> str: + return s.replace("\n", " ").rstrip() + + def generate_metric_type(metric_type: str) -> str: if metric_type == "quantity": return "int64" @@ -77,14 +87,133 @@ def generate_metric_type(metric_type: str) -> str: return "time.Time" elif metric_type == "string_list": return "[]string" + # 'oneOf' is not currently supported in object structures + elif metric_type == "object": + return "object" else: print("❌ Unable to generate Go type from metric type: " + metric_type) exit return "NONE" -def clean_string(s: str) -> str: - return s.replace("\n", " ").rstrip() +def generate_parameter_type(schema: Dict[str, Any], indent: int = 0) -> str: + """ + Convert a JSON schema type definition to a Go type string. + + :param schema: JSON schema definition (e.g., from metric.structure) + :param indent: Current indentation level for nested structs + :return: Go type string + """ + parameter_type = schema.get("type") + + if parameter_type == "string": + return "string" + elif parameter_type == "number": + return "float64" + elif parameter_type == "boolean": + return "bool" + elif parameter_type == "array": + return generate_array_struct_definition(schema, indent) + elif parameter_type == "object": + properties = schema.get("properties", {}) + if not properties: + print( + "❌ Unable to generate Go type. Object type must have 'properties' field with at least one property" + ) + exit + return "NONE" + + indent_str = "\t" * (indent + 1) + fields = [] + for prop_name, prop_schema in properties.items(): + field_type = generate_parameter_type(prop_schema, indent + 1) + field_name = util.Camelize(prop_name) + json_tag = f'`json:"{prop_name}"`' + fields.append(f"{indent_str}{field_name} {field_type} {json_tag}") + + fields_str = "\n".join(fields) + close_indent = "\t" * indent + return f"struct {{\n{fields_str}\n{close_indent}}}" + else: + print( + f"❌ Unable to generate Go type. Unknown parameter type '{parameter_type}'. Supported types: string, number, boolean, array, object" + ) + exit + return "NONE" + + +def generate_object_struct_definition(metric: metrics.Metric) -> str: + """ + Generate a complete Go struct definition for an object metric. + + :param metric: The object metric + :return: Go struct definition as a string + """ + type_name = generate_object_type_name(metric) + + if not hasattr(metric, "structure") or not metric.structure: + print( + f"❌ Unable to generate Go type. Object metric '{metric.category}.{metric.name}' is missing required 'structure' field" + ) + exit + return "NONE" + + parameter_type = metric.structure.get("type") + indent = 0 + + if parameter_type == "array": + array_type = generate_array_struct_definition(metric.structure, indent) + return f"type {type_name} {array_type}" + elif parameter_type == "object": + properties = metric.structure.get("properties", {}) + if not properties: + print( + f"❌ Unable to generate Go type. Object metric '{metric.category}.{metric.name}' has object type but no 'properties' defined" + ) + exit + return "NONE" + + indent += 1 + fields = [] + for prop_name, prop_schema in properties.items(): + field_type = generate_parameter_type(prop_schema, indent) + field_name = util.Camelize(prop_name) + json_tag = f'`json:"{prop_name}"`' + fields.append(f"\t{field_name} {field_type} {json_tag}") + + fields_str = "\n".join(fields) + return f"type {type_name} struct {{\n{fields_str}\n}}" + else: + print( + f"❌ Unable to generate Go type. Object metric '{metric.category}.{metric.name}' has unexpected type '{parameter_type}'. Expected 'array' or 'object'" + ) + exit + return "NONE" + + +def generate_array_struct_definition(schema: Dict[str, Any], indent: int) -> str: + """ + Generate Go type for an array schema. + + :param schema: Array schema with 'items' field + :param indent: Current indentation level for nested structs + :return: Go array type string + """ + items_schema = schema.get("items", {}) + + if "oneOf" in items_schema and "type" not in items_schema: + print("❌ oneOf is currently not supported in Go struct generation") + exit + return "NONE" + elif "type" not in items_schema: + print( + "❌ Unable to generate Go type. Array items schema must have 'type' field" + ) + exit + return "NONE" + + item_type = generate_parameter_type(items_schema, indent) + return f"[]{item_type}" def output_go( @@ -112,12 +241,17 @@ def output_go( ("metric_argument_name", generate_metric_argument_name), ("go_metric_type", generate_metric_type), ("clean_string", clean_string), + ("object_type_name", generate_object_type_name), + ("object_struct_definition", generate_object_struct_definition), ), ) # unique list of event metrics used in any ping event_metrics: List[metrics.Metric] = [] + # unique list of object metrics used in any ping + object_metrics: List[metrics.Metric] = [] + # Go through all metrics in objs and build a map of # ping->list of metric categories->list of metrics # for easier processing in the template. @@ -138,6 +272,9 @@ def output_go( if metric.type == "event" and metric not in event_metrics: event_metrics.append(metric) + if metric.type == "object" and metric not in object_metrics: + object_metrics.append(metric) + metrics_by_type = ping_to_metrics[ping] metrics_list = metrics_by_type.setdefault(metric.type, []) metrics_list.append(metric) @@ -156,6 +293,9 @@ def output_go( with filepath.open("w", encoding="utf-8") as fd: fd.write( template.render( - parser_version=__version__, pings=ping_to_metrics, events=event_metrics + parser_version=__version__, + pings=ping_to_metrics, + events=event_metrics, + objects=object_metrics, ) ) diff --git a/glean_parser/metrics.py b/glean_parser/metrics.py index 9ba944fe3..274908f09 100644 --- a/glean_parser/metrics.py +++ b/glean_parser/metrics.py @@ -462,6 +462,11 @@ def __init__(self, *args, **kwargs): self._generate_structure = self.validate_structure(structure) super().__init__(*args, **kwargs) + @property + def structure(self): + """Return the validated structure for this object metric.""" + return self._generate_structure + ALLOWED_TOPLEVEL = {"type", "properties", "items", "description", "oneOf"} ALLOWED_TYPES = ["object", "array", "number", "string", "boolean"] ALLOWED_SUBTYPES = ["number", "string", "boolean"] diff --git a/glean_parser/templates/go_server.jinja2 b/glean_parser/templates/go_server.jinja2 index bdcb2625c..4f80e1731 100644 --- a/glean_parser/templates/go_server.jinja2 +++ b/glean_parser/templates/go_server.jinja2 @@ -220,6 +220,13 @@ func (e {{ event|event_type_name }}) gleanEvent() gleanEvent { } {% endfor %} {% endif %} +{# if any ping has an object metric, create type definitions for them #} +{% if objects %} +{% for object in objects %} + +{{ object|object_struct_definition }} +{% endfor %} +{% endif %} {# struct & methods for submitting pings #} {% for ping, metrics_by_type in pings.items() %} {% if metrics_by_type['event'] %} @@ -240,7 +247,11 @@ type {{ ping|ping_type_name }} struct { {% for metric_type, metrics in metrics_by_type.items() %} {% if metric_type != 'event' %} {% for metric in metrics %} + {% if metric_type == 'object' %} + {{ metric|metric_argument_name }} {{ metric|object_type_name }} // {{ metric.description|clean_string }} + {% else %} {{ metric|metric_argument_name }} {{ metric_type|go_metric_type}} // {{ metric.description|clean_string }} + {% endif %} {% endfor %} {% endif %} {% endfor %} diff --git a/tests/data/go_server_objects_metrics.yaml b/tests/data/go_server_objects_metrics.yaml new file mode 100644 index 000000000..b4daa738e --- /dev/null +++ b/tests/data/go_server_objects_metrics.yaml @@ -0,0 +1,116 @@ +# Any copyright is dedicated to the Public Domain. +# https://creativecommons.org/publicdomain/zero/1.0/ + +--- +$schema: moz://mozilla.org/schemas/glean/metrics/2-0-0 + +metric: + name: + type: string + description: | + Test string metric + lifetime: application + send_in_pings: + - server-telemetry-objects + notification_emails: + - CHANGE-ME@example.com + bugs: + - TBD + data_reviews: + - TBD + expires: never + +test: + simple_object: + type: object + description: Simple object with basic types + send_in_pings: + - server-telemetry-objects + notification_emails: + - CHANGE-ME@example.com + bugs: + - TBD + data_reviews: + - TBD + expires: never + structure: + type: object + properties: + name: + type: string + count: + type: number + enabled: + type: boolean + + number_array: + type: object + description: Array of numbers + send_in_pings: + - server-telemetry-objects + notification_emails: + - CHANGE-ME@example.com + bugs: + - TBD + data_reviews: + - TBD + expires: never + structure: + type: array + items: + type: number + + nested_object: + type: object + description: Object with nested structures + send_in_pings: + - server-telemetry-objects + notification_emails: + - CHANGE-ME@example.com + bugs: + - TBD + data_reviews: + - TBD + expires: never + structure: + type: object + properties: + user_id: + type: string + metadata: + type: object + properties: + version: + type: number + active: + type: boolean + tags: + type: array + items: + type: string + + complex_array: + type: object + description: Array of objects + send_in_pings: + - server-telemetry-objects + notification_emails: + - CHANGE-ME@example.com + bugs: + - TBD + data_reviews: + - TBD + expires: never + structure: + type: array + items: + type: object + properties: + id: + type: number + name: + type: string + data: + type: array + items: + type: number diff --git a/tests/data/go_server_objects_pings.yaml b/tests/data/go_server_objects_pings.yaml new file mode 100644 index 000000000..ecdc8d27d --- /dev/null +++ b/tests/data/go_server_objects_pings.yaml @@ -0,0 +1,17 @@ +# Any copyright is dedicated to the Public Domain. +# https://creativecommons.org/publicdomain/zero/1.0/ + +--- +$schema: moz://mozilla.org/schemas/glean/pings/2-0-0 + +server-telemetry-objects: + description: | + Backend ping for testing object metrics + include_client_id: false + send_if_empty: false + bugs: + - TBD + data_reviews: + - TBD + notification_emails: + - CHANGE-ME@example.com diff --git a/tests/data/server_objects_compare.go b/tests/data/server_objects_compare.go new file mode 100644 index 000000000..f2e2f3424 --- /dev/null +++ b/tests/data/server_objects_compare.go @@ -0,0 +1,239 @@ +package glean + +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// AUTOGENERATED BY {current_version}. DO NOT EDIT. + +// required imports +import ( + "encoding/json" + "errors" + "fmt" + "io" + "strconv" + "time" + + "github.com/google/uuid" +) + +// log type string used to identify logs to process in the Moz Data Pipeline +var gleanEventMozlogType string = "glean-server-event" + +// A GleanEventsLogger produces output in the required format for Glean to ingest. +// Glean ingestion requires the output to be written to os.Stdout. Writing to a different +// output will require the consumer to handle any closing as appropriate for the Writer. +// e.g. if writing to a file. +type GleanEventsLogger struct { + AppID string // Application Id to identify application per Glean standards + AppDisplayVersion string // Version of application emitting the event + AppChannel string // Channel to differentiate logs from prod/beta/staging/devel + Writer io.Writer // Writer to output to. Normal operation expects os.Stdout +} + +// exported type for public method parameters +type RequestInfo struct { + UserAgent string + IpAddress string +} + +// default empty values will be omitted in json from ping struct definition +var defaultRequestInfo = RequestInfo{ + UserAgent: "", + IpAddress: "", +} + +// structs to construct the glean ping +type clientInfo struct { + TelemetrySDKBuild string `json:"telemetry_sdk_build"` + FirstRunDate string `json:"first_run_date"` + OS string `json:"os"` + OSVersion string `json:"os_version"` + Architecture string `json:"architecture"` + AppBuild string `json:"app_build"` + AppDisplayVersion string `json:"app_display_version"` + AppChannel string `json:"app_channel"` +} + +type pingInfo struct { + Seq int `json:"seq"` + StartTime string `json:"start_time"` + EndTime string `json:"end_time"` +} + +type ping struct { + DocumentNamespace string `json:"document_namespace"` + DocumentType string `json:"document_type"` + DocumentVersion string `json:"document_version"` + DocumentID string `json:"document_id"` + UserAgent string `json:"user_agent,omitempty"` + IpAddress string `json:"ip_address,omitempty"` + Payload string `json:"payload"` +} + +type metrics map[string]map[string]interface{} + +type pingPayload struct { + ClientInfo clientInfo `json:"client_info"` + PingInfo pingInfo `json:"ping_info"` + Metrics metrics `json:"metrics"` + Events []gleanEvent `json:"events"` +} + +type gleanEvent struct { + Category string `json:"category"` + Name string `json:"name"` + Timestamp int64 `json:"timestamp"` + Extra map[string]string `json:"extra"` +} + +type logEnvelope struct { + Timestamp string + Logger string + Type string + Fields ping +} + +func (g GleanEventsLogger) createClientInfo() clientInfo { + // Fields with default values are required in the Glean schema, but not used in server context + return clientInfo{ + TelemetrySDKBuild: "{current_version}", + FirstRunDate: "Unknown", + OS: "Unknown", + OSVersion: "Unknown", + Architecture: "Unknown", + AppBuild: "Unknown", + AppDisplayVersion: g.AppDisplayVersion, + AppChannel: g.AppChannel, + } +} + +func createPingInfo() pingInfo { + now := time.Now().UTC().Format("2006-01-02T15:04:05.000Z") + return pingInfo{ + Seq: 0, + StartTime: now, + EndTime: now, + } +} + +func (g GleanEventsLogger) createPing(documentType string, config RequestInfo, payload pingPayload) (ping, error) { + payloadJson, err := json.Marshal(payload) + if err != nil { + return ping{}, err + } + + documentID, err := uuid.NewRandom() + if err != nil { + return ping{}, err + } + + return ping{ + DocumentNamespace: g.AppID, + DocumentType: documentType, + DocumentVersion: "1", + DocumentID: documentID.String(), + UserAgent: config.UserAgent, + IpAddress: config.IpAddress, + Payload: string(payloadJson), + }, nil +} + +// method called by each ping-specific record method. +// construct the ping, wrap it in the envelope, and print to stdout +func (g GleanEventsLogger) record( + documentType string, + requestInfo RequestInfo, + metrics metrics, + events []gleanEvent, +) error { + if g.Writer == nil { + return errors.New("writer not specified") + } + + telemetryPayload := pingPayload{ + ClientInfo: g.createClientInfo(), + PingInfo: createPingInfo(), + Metrics: metrics, + Events: events, + } + + ping, err := g.createPing(documentType, requestInfo, telemetryPayload) + if err != nil { + return err + } + + envelope := logEnvelope{ + Timestamp: strconv.FormatInt(time.Now().UnixNano(), 10), + Logger: "glean", + Type: gleanEventMozlogType, + Fields: ping, + } + envelopeJson, err := json.Marshal(envelope) + if err != nil { + return err + } + + fmt.Fprintln(g.Writer, string(envelopeJson)) + return nil +} + +type TestComplexArrayObject []struct { + Id float64 `json:"id"` + Name string `json:"name"` + Data []float64 `json:"data"` +} + +type TestNestedObjectObject struct { + UserId string `json:"user_id"` + Metadata struct { + Version float64 `json:"version"` + Active bool `json:"active"` + } `json:"metadata"` + Tags []string `json:"tags"` +} + +type TestNumberArrayObject []float64 + +type TestSimpleObjectObject struct { + Name string `json:"name"` + Count float64 `json:"count"` + Enabled bool `json:"enabled"` +} + +type ServerTelemetryObjectsPing struct { + MetricName string // Test string metric + TestComplexArray TestComplexArrayObject // Array of objects + TestNestedObject TestNestedObjectObject // Object with nested structures + TestNumberArray TestNumberArrayObject // Array of numbers + TestSimpleObject TestSimpleObjectObject // Simple object with basic types +} + +// Record and submit `server-telemetry-objects` ping +func (g GleanEventsLogger) RecordServerTelemetryObjectsPing( + requestInfo RequestInfo, + params ServerTelemetryObjectsPing, +) error { + metrics := metrics{ + "string": { + "metric.name": params.MetricName, + }, + "object": { + "test.complex_array": params.TestComplexArray, + "test.nested_object": params.TestNestedObject, + "test.number_array": params.TestNumberArray, + "test.simple_object": params.TestSimpleObject, + }, + } + + events := []gleanEvent{} + return g.record("server-telemetry-objects", requestInfo, metrics, events) +} + +// Record and submit `server-telemetry-objects` ping omitting user request info +func (g GleanEventsLogger) RecordServerTelemetryObjectsPingWithoutUserInfo( + params ServerTelemetryObjectsPing, +) error { + return g.RecordServerTelemetryObjectsPing(defaultRequestInfo, params) +} diff --git a/tests/test-go/test.go.tmpl b/tests/test-go/test.go.tmpl index 3ab9e6c3c..52b3e065c 100644 --- a/tests/test-go/test.go.tmpl +++ b/tests/test-go/test.go.tmpl @@ -3,7 +3,6 @@ package main import ( "glean/glean" "os" - "time" /* IMPORTS */ ) diff --git a/tests/test_go_server.py b/tests/test_go_server.py index 8790ff4ea..558a6a512 100644 --- a/tests/test_go_server.py +++ b/tests/test_go_server.py @@ -183,7 +183,7 @@ def test_run_logging_events_ping(tmp_path): ) """ - logged_output = run_logger(tmp_path, code) + logged_output = run_logger(tmp_path, code, imports='"time"') logged_output = json.loads(logged_output) fields = logged_output["Fields"] payload = fields["payload"] @@ -207,6 +207,145 @@ def test_run_logging_events_ping(tmp_path): ) +def test_parser_go_server_with_objects(tmp_path): + """Test that parser works with object metrics""" + translate.translate( + [ + ROOT / "data" / "go_server_objects_metrics.yaml", + ROOT / "data" / "go_server_objects_pings.yaml", + ], + "go_server", + tmp_path, + ) + + assert set(x.name for x in tmp_path.iterdir()) == set(["server_events.go"]) + + # Make sure generated file matches expected + with (tmp_path / "server_events.go").open("r", encoding="utf-8") as fd: + content = fd.read() + with (ROOT / "data" / "server_objects_compare.go").open( + "r", encoding="utf-8" + ) as cd: + compare_raw = cd.read() + + glean_version = f"glean_parser v{glean_parser.__version__}" + # use replace instead of format since Go uses { } + compare = compare_raw.replace("{current_version}", glean_version) + assert content == compare + + +@pytest.mark.go_dependency +def test_run_logging_objects_ping(tmp_path): + """Test that generated code with object metrics compiles and runs correctly""" + glean_module_path = tmp_path / "glean" + + translate.translate( + [ + ROOT / "data" / "go_server_objects_metrics.yaml", + ROOT / "data" / "go_server_objects_pings.yaml", + ], + "go_server", + glean_module_path, + ) + + code = """ + logger.RecordServerTelemetryObjectsPing( + glean.RequestInfo{ + UserAgent: "glean-test/1.0", + IpAddress: "127.0.0.1", + }, + glean.ServerTelemetryObjectsPing{ + MetricName: "test string", + TestSimpleObject: glean.TestSimpleObjectObject{ + Name: "simple test", + Count: 42.5, + Enabled: true, + }, + TestNumberArray: glean.TestNumberArrayObject{1.1, 2.2, 3.3}, + TestNestedObject: glean.TestNestedObjectObject{ + UserId: "user123", + Metadata: struct { + Version float64 `json:"version"` + Active bool `json:"active"` + }{ + Version: 1.0, + Active: true, + }, + Tags: []string{"tag1", "tag2"}, + }, + TestComplexArray: glean.TestComplexArrayObject{ + { + Id: 1, + Name: "item1", + Data: []float64{10.1, 20.2}, + }, + { + Id: 2, + Name: "item2", + Data: []float64{30.3, 40.4}, + }, + }, + }, + ) + """ + + logged_output = run_logger(tmp_path, code) + logged_output = json.loads(logged_output) + fields = logged_output["Fields"] + payload = fields["payload"] + + # Basic validation + assert "glean-server-event" == logged_output["Type"] + assert "glean.test" == fields["document_namespace"] + assert "server-telemetry-objects" == fields["document_type"] + assert "1" == fields["document_version"] + assert "glean-test/1.0" == fields["user_agent"] + + # Validate against Glean schema + schema_url = ( + "https://raw.githubusercontent.com/mozilla-services/" + "mozilla-pipeline-schemas/main/" + "schemas/glean/glean/glean.1.schema.json" + ) + + input = io.StringIO(payload) + output = io.StringIO() + assert validate_ping.validate_ping(input, output, schema_url=schema_url) == 0, ( + output.getvalue() + ) + + # Validate object structure in payload + payload_json = json.loads(payload) + metrics = payload_json["metrics"] + + # Check object metrics exist + assert "object" in metrics + assert "test.simple_object" in metrics["object"] + assert "test.number_array" in metrics["object"] + assert "test.nested_object" in metrics["object"] + assert "test.complex_array" in metrics["object"] + + # Validate simple object structure + simple_obj = metrics["object"]["test.simple_object"] + assert simple_obj["name"] == "simple test" + assert simple_obj["count"] == 42.5 + assert simple_obj["enabled"] + + # Validate number array + number_array = metrics["object"]["test.number_array"] + assert number_array[0] == 1.1 + + # Validate nested object + nested_obj = metrics["object"]["test.nested_object"] + assert nested_obj["user_id"] == "user123" + assert nested_obj["metadata"]["version"] == 1.0 + + # Validate complex array + complex_array = metrics["object"]["test.complex_array"] + assert complex_array[0]["name"] == "item1" + assert complex_array[1]["data"][0] == 30.3 + + @pytest.mark.go_dependency def test_run_logging_custom_ping_without_event(tmp_path): glean_module_path = tmp_path / "glean" @@ -236,7 +375,7 @@ def test_run_logging_custom_ping_without_event(tmp_path): ) """ - logged_output = run_logger(tmp_path, code) + logged_output = run_logger(tmp_path, code, imports='"time"') logged_output = json.loads(logged_output) fields = logged_output["Fields"] payload = fields["payload"] @@ -276,6 +415,7 @@ def test_run_logging_discard_writer(tmp_path): imports = """ "io" "fmt" + "time" """ code = """ @@ -318,6 +458,7 @@ def test_run_logging_nil_writer(tmp_path): imports = """ "fmt" + "time" """ code = """ @@ -378,7 +519,7 @@ def test_run_logging_custom_ping_with_event(tmp_path): ) """ - logged_output = run_logger(tmp_path, code) + logged_output = run_logger(tmp_path, code, imports='"time"') logged_output = json.loads(logged_output) fields = logged_output["Fields"] payload = fields["payload"]