From 3415911db0594b5387978aa7bcf8d81c7b8b8304 Mon Sep 17 00:00:00 2001 From: Dylan Date: Wed, 10 Jun 2026 08:52:56 -0400 Subject: [PATCH] fix(super-editor): emit OOXML property children in canonical schema order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OOXML property containers (w:rPr, w:pPr, w:numPr, w:trPr, w:tcPr, w:tblPr, w:tblPrEx, w:sectPr) are defined by ECMA-376 as xsd:sequence complex types (CT_RPr, CT_PPr, ...) whose child elements MUST appear in a fixed order. On export, SuperDoc emitted these children in JavaScript insertion order: decodeProperties() iterates Object.keys(attrs) (ProseMirror attr order) and generateRunProps() emits run marks in mark-array order. Neither matches the schema sequence, so children came out arbitrarily (e.g. numId before ilvl on every list item, color before rFonts, trPr hidden before trHeight, tblPr tblLook before tblCellMar). Desktop Word silently repairs out-of-order children on open and hid the bug, but Word for the web rejects the package and opens the document read-only as "corrupt" — reproduced repeatedly against real exports. Fix: add a shared canonical child-order table keyed by container local name (ooxml-property-order.js) and stable-sort the emitted children at the two emit sites — createNestedPropertiesTranslator.decode (covers rPr/pPr/numPr/trPr/ tcPr/tblPr) and exporter.js generateRunProps (the run-mark rPr path). Unknown or extension elements (w14:*, mc:AlternateContent) sort after known children with their relative order preserved. This generalizes the existing CT_TC_MAR_CHILD_ORDER pattern (tcMar/tblCellMar) into one reusable table. The fix is independent of translator array order, so the non-canonical ordering of the propertyTranslators arrays themselves no longer matters. Co-authored-by: Cursor --- .../v1/core/super-converter/exporter.js | 8 +- .../v3/handlers/ooxml-property-order.js | 237 ++++++++++++++++++ .../v3/handlers/ooxml-property-order.test.js | 122 +++++++++ .../core/super-converter/v3/handlers/utils.js | 6 +- 4 files changed, 371 insertions(+), 2 deletions(-) create mode 100644 packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/ooxml-property-order.js create mode 100644 packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/ooxml-property-order.test.js diff --git a/packages/super-editor/src/editors/v1/core/super-converter/exporter.js b/packages/super-editor/src/editors/v1/core/super-converter/exporter.js index 2fc64e5311..f97c37b596 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/exporter.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/exporter.js @@ -2,6 +2,7 @@ import { SuperConverter } from './SuperConverter.js'; import { inchesToTwips, linesToTwips, rgbToHex } from './helpers.js'; import { DEFAULT_DOCX_DEFS } from './exporter-docx-defs.js'; import { translateChildNodes } from './v2/exporter/helpers/index.js'; +import { sortPropertyChildElements } from './v3/handlers/ooxml-property-order.js'; import { translator as wBrNodeTranslator } from './v3/handlers/w/br/br-translator.js'; import { translator as wHighlightTranslator } from './v3/handlers/w/highlight/highlight-translator.js'; import { translator as wTabNodeTranslator } from './v3/handlers/w/tab/tab-translator.js'; @@ -508,7 +509,12 @@ export function wrapTextInRun(nodeOrNodes, marks) { export function generateRunProps(marks = []) { return { name: 'w:rPr', - elements: marks.filter((mark) => !!Object.keys(mark).length), + // w:rPr is an ECMA-376 xsd:sequence (CT_RPr); emit children in canonical + // schema order rather than mark-array order so Word for the web accepts it. + elements: sortPropertyChildElements( + 'w:rPr', + marks.filter((mark) => !!Object.keys(mark).length), + ), }; } diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/ooxml-property-order.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/ooxml-property-order.js new file mode 100644 index 0000000000..528c19a951 --- /dev/null +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/ooxml-property-order.js @@ -0,0 +1,237 @@ +// @ts-check +/** + * Canonical child-element order for OOXML fixed-sequence property containers. + * + * ECMA-376 models these property containers (`w:rPr`, `w:pPr`, `w:numPr`, ...) + * as `xsd:sequence` complex types (`CT_RPr`, `CT_PPr`, `CT_NumPr`, ...), which + * means their child elements MUST appear in a fixed order. Desktop Word + * silently repairs out-of-order children on open, but Word for the web rejects + * the whole package and opens the document read-only as "corrupt". + * + * The export pipeline emits children in JavaScript object/array insertion order + * (`Object.keys(attrs)`, mark-array order), which is not guaranteed to match the + * schema sequence. This table lets the emit paths stable-sort children back into + * canonical order regardless of how the ProseMirror attributes were built. + * + * Keys are container local names (namespace prefix stripped); values are the + * ECMA-376 child sequences as local names. Sequences are sourced from ECMA-376 + * (CT_RPr §17.3.2.28, CT_PPr §17.3.1.26, CT_NumPr §17.3.1.19, CT_TrPr §17.4.82, + * CT_TcPr §17.4.70, CT_TblPr §17.4.60, CT_TblPrEx §17.4.61, CT_SectPr §17.6.17). + * + * @type {Readonly>} + */ +export const OOXML_PROPERTY_CHILD_ORDER = Object.freeze({ + rPr: [ + 'rStyle', + 'rFonts', + 'b', + 'bCs', + 'i', + 'iCs', + 'caps', + 'smallCaps', + 'strike', + 'dstrike', + 'outline', + 'shadow', + 'emboss', + 'imprint', + 'noProof', + 'snapToGrid', + 'vanish', + 'webHidden', + 'color', + 'spacing', + 'w', + 'kern', + 'position', + 'sz', + 'szCs', + 'highlight', + 'u', + 'effect', + 'bdr', + 'shd', + 'fitText', + 'vertAlign', + 'rtl', + 'cs', + 'em', + 'lang', + 'eastAsianLayout', + 'specVanish', + 'oMath', + 'rPrChange', + ], + pPr: [ + 'pStyle', + 'keepNext', + 'keepLines', + 'pageBreakBefore', + 'framePr', + 'widowControl', + 'numPr', + 'suppressLineNumbers', + 'pBdr', + 'shd', + 'tabs', + 'suppressAutoHyphens', + 'kinsoku', + 'wordWrap', + 'overflowPunct', + 'topLinePunct', + 'autoSpaceDE', + 'autoSpaceDN', + 'bidi', + 'adjustRightInd', + 'snapToGrid', + 'spacing', + 'ind', + 'contextualSpacing', + 'mirrorIndents', + 'suppressOverlap', + 'jc', + 'textDirection', + 'textAlignment', + 'textboxTightWrap', + 'outlineLvl', + 'divId', + 'cnfStyle', + 'rPr', + 'sectPr', + 'pPrChange', + ], + numPr: ['ilvl', 'numId', 'numberingChange', 'ins'], + trPr: [ + 'cnfStyle', + 'divId', + 'gridBefore', + 'gridAfter', + 'wBefore', + 'wAfter', + 'cantSplit', + 'trHeight', + 'tblHeader', + 'tblCellSpacing', + 'jc', + 'hidden', + 'ins', + 'del', + 'trPrChange', + ], + tcPr: [ + 'cnfStyle', + 'tcW', + 'gridSpan', + 'hMerge', + 'vMerge', + 'tcBorders', + 'shd', + 'noWrap', + 'tcMar', + 'textDirection', + 'tcFitText', + 'vAlign', + 'hideMark', + 'headers', + 'cellIns', + 'cellDel', + 'cellMerge', + 'tcPrChange', + ], + tblPr: [ + 'tblStyle', + 'tblpPr', + 'tblOverlap', + 'bidiVisual', + 'tblStyleRowBandSize', + 'tblStyleColBandSize', + 'tblW', + 'jc', + 'tblCellSpacing', + 'tblInd', + 'tblBorders', + 'shd', + 'tblLayout', + 'tblCellMar', + 'tblLook', + 'tblCaption', + 'tblDescription', + 'tblPrChange', + ], + tblPrEx: ['tblW', 'jc', 'tblCellSpacing', 'tblInd', 'tblBorders', 'shd', 'tblLayout', 'tblCellMar', 'tblLook'], + sectPr: [ + 'headerReference', + 'footerReference', + 'footnotePr', + 'endnotePr', + 'type', + 'pgSz', + 'pgMar', + 'paperSrc', + 'pgBorders', + 'lnNumType', + 'pgNumType', + 'cols', + 'formProt', + 'vAlign', + 'noEndnote', + 'titlePg', + 'textDirection', + 'bidi', + 'rtlGutter', + 'docGrid', + 'printerSettings', + 'sectPrChange', + ], +}); + +/** + * Strips an OOXML namespace prefix from an element name (e.g. `w:rFonts` → `rFonts`). + * @param {string|undefined|null} name + * @returns {string|undefined|null} + */ +const stripPrefix = (name) => + typeof name === 'string' && name.includes(':') ? name.slice(name.indexOf(':') + 1) : name; + +/** + * Returns the canonical child sequence for a property container, or undefined + * if the container is not a known fixed-sequence type. + * @param {string} containerName Container element name, with or without prefix (e.g. `w:rPr` or `rPr`). + * @returns {string[]|undefined} + */ +export function getCanonicalChildOrder(containerName) { + const key = stripPrefix(containerName); + return key ? OOXML_PROPERTY_CHILD_ORDER[key] : undefined; +} + +/** + * Stable-sorts the child elements of a fixed-sequence OOXML property container + * into ECMA-376 canonical order. + * + * Known children are ordered by their position in the canonical sequence. + * Unknown/extension elements (e.g. `w14:*`, `mc:AlternateContent`) sort AFTER + * all known children, preserving their original relative order. If the + * container has no canonical sequence, the input is returned unchanged. + * + * @template {{ name?: string }} T + * @param {string} containerName The container element name (e.g. `w:rPr`). + * @param {T[]} elements The emitted child elements. + * @returns {T[]} The elements ordered into canonical schema sequence. + */ +export function sortPropertyChildElements(containerName, elements) { + if (!Array.isArray(elements) || elements.length < 2) return elements; + const order = getCanonicalChildOrder(containerName); + if (!order) return elements; + + const rankByName = new Map(order.map((name, index) => [name, index])); + const UNKNOWN_RANK = Number.MAX_SAFE_INTEGER; + + return elements + .map((element, index) => { + const rank = rankByName.get(stripPrefix(element?.name)); + return { element, index, rank: rank == null ? UNKNOWN_RANK : rank }; + }) + .sort((a, b) => a.rank - b.rank || a.index - b.index) + .map((entry) => entry.element); +} diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/ooxml-property-order.test.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/ooxml-property-order.test.js new file mode 100644 index 0000000000..2655917f67 --- /dev/null +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/ooxml-property-order.test.js @@ -0,0 +1,122 @@ +import { describe, it, expect, vi } from 'vitest'; +import { sortPropertyChildElements, getCanonicalChildOrder } from './ooxml-property-order.js'; +import { translator as numPrTranslator } from './w/numPr/numPr-translator.js'; +import { translator as trPrTranslator } from './w/trPr/trPr-translator.js'; +import { translator as rPrTranslator } from './w/rpr/rpr-translator.js'; + +// Some property translators transitively import the exporter; stub it to keep +// this unit suite isolated (mirrors numPr-translator.test.js). +vi.mock('@converter/exporter', () => ({ + exportSchemaToJson: vi.fn(), + createTrackStyleMark: vi.fn(), +})); + +const names = (node) => node.elements.map((el) => el.name); + +describe('sortPropertyChildElements', () => { + it('orders w:rPr children by canonical CT_RPr sequence (e.g. rFonts before color)', () => { + const elements = [{ name: 'w:color' }, { name: 'w:sz' }, { name: 'w:rFonts' }, { name: 'w:b' }]; + expect(sortPropertyChildElements('w:rPr', elements).map((e) => e.name)).toEqual([ + 'w:rFonts', + 'w:b', + 'w:color', + 'w:sz', + ]); + }); + + it('orders w:pPr children by canonical CT_PPr sequence', () => { + const elements = [{ name: 'w:jc' }, { name: 'w:spacing' }, { name: 'w:numPr' }, { name: 'w:pStyle' }]; + expect(sortPropertyChildElements('w:pPr', elements).map((e) => e.name)).toEqual([ + 'w:pStyle', + 'w:numPr', + 'w:spacing', + 'w:jc', + ]); + }); + + it('orders w:numPr children so w:ilvl precedes w:numId', () => { + const elements = [{ name: 'w:numId' }, { name: 'w:ilvl' }]; + expect(sortPropertyChildElements('w:numPr', elements).map((e) => e.name)).toEqual(['w:ilvl', 'w:numId']); + }); + + it('orders w:trPr children so w:trHeight precedes w:hidden', () => { + const elements = [{ name: 'w:hidden' }, { name: 'w:trHeight' }, { name: 'w:cnfStyle' }]; + expect(sortPropertyChildElements('w:trPr', elements).map((e) => e.name)).toEqual([ + 'w:cnfStyle', + 'w:trHeight', + 'w:hidden', + ]); + }); + + it('orders w:tcPr children by canonical CT_TcPr sequence', () => { + const elements = [{ name: 'w:vAlign' }, { name: 'w:tcW' }, { name: 'w:gridSpan' }]; + expect(sortPropertyChildElements('w:tcPr', elements).map((e) => e.name)).toEqual([ + 'w:tcW', + 'w:gridSpan', + 'w:vAlign', + ]); + }); + + it('orders w:tblPr children so w:tblCellMar precedes w:tblLook', () => { + const elements = [{ name: 'w:tblLook' }, { name: 'w:tblCellMar' }, { name: 'w:tblStyle' }]; + expect(sortPropertyChildElements('w:tblPr', elements).map((e) => e.name)).toEqual([ + 'w:tblStyle', + 'w:tblCellMar', + 'w:tblLook', + ]); + }); + + it('places unknown/extension elements after known children, preserving their relative order (stable)', () => { + const elements = [ + { name: 'w14:ligatures' }, + { name: 'w:color' }, + { name: 'mc:AlternateContent' }, + { name: 'w:rFonts' }, + ]; + expect(sortPropertyChildElements('w:rPr', elements).map((e) => e.name)).toEqual([ + 'w:rFonts', + 'w:color', + 'w14:ligatures', + 'mc:AlternateContent', + ]); + }); + + it('returns the input unchanged for containers without a canonical sequence', () => { + const elements = [{ name: 'w:bottom' }, { name: 'w:top' }]; + expect(sortPropertyChildElements('w:tcMar', elements)).toBe(elements); + }); + + it('returns the input unchanged when there are fewer than two elements', () => { + const single = [{ name: 'w:numId' }]; + expect(sortPropertyChildElements('w:numPr', single)).toBe(single); + }); + + it('exposes canonical sequences via getCanonicalChildOrder (prefix-insensitive)', () => { + expect(getCanonicalChildOrder('w:numPr')).toEqual(['ilvl', 'numId', 'numberingChange', 'ins']); + expect(getCanonicalChildOrder('numPr')).toEqual(['ilvl', 'numId', 'numberingChange', 'ins']); + expect(getCanonicalChildOrder('w:notAContainer')).toBeUndefined(); + }); +}); + +describe('property container translators emit children in canonical order', () => { + it('w:numPr emits w:ilvl before w:numId even when attrs are set numId-first', () => { + const result = numPrTranslator.decode({ + node: { attrs: { numberingProperties: { numId: 7, ilvl: 2 } } }, + }); + expect(names(result)).toEqual(['w:ilvl', 'w:numId']); + }); + + it('w:trPr emits w:trHeight before w:hidden even when attrs are set hidden-first', () => { + const result = trPrTranslator.decode({ + node: { attrs: { tableRowProperties: { hidden: true, rowHeight: { value: 240, rule: 'atLeast' } } } }, + }); + expect(names(result)).toEqual(['w:trHeight', 'w:hidden']); + }); + + it('w:rPr emits w:rFonts before w:color even when attrs are set color-first', () => { + const result = rPrTranslator.decode({ + node: { attrs: { runProperties: { color: { val: 'FF0000' }, fontFamily: { ascii: 'Arial' } } } }, + }); + expect(names(result)).toEqual(['w:rFonts', 'w:color']); + }); +}); diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/utils.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/utils.js index 0da20bf8d3..acf21bbf60 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/utils.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/utils.js @@ -1,6 +1,7 @@ import { NodeTranslator } from '../node-translator/index.js'; import { ST_ON_OFF_ON_VALUES, ST_ON_OFF_OFF_VALUES } from '@superdoc/document-api'; import { pushDiagnostic } from './import-diagnostics.js'; +import { sortPropertyChildElements } from './ooxml-property-order.js'; /** * Generates a handler entity for a given node translator. @@ -550,7 +551,10 @@ export function createNestedPropertiesTranslator( name: xmlName, type: 'element', attributes: decodedAttrs, - elements: elements, + // ECMA-376 models these containers as xsd:sequence; children must be + // emitted in canonical schema order or Word for the web rejects the + // package as corrupt. PM attribute insertion order is not canonical. + elements: sortPropertyChildElements(xmlName, elements), }; return newNode;