Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 24 additions & 16 deletions .github/workflows/haskell.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,10 @@ jobs:
- 9.4.8+macOS
- 9.6.5
- 9.8.4
- 9.10.2+ucd2haskell
- 9.12.2
- 9.14.1-alpha
- 9.10.3+ucd2haskell
- 9.12.4
- 9.14.1
# - 9.16.1-alpha
- hlint
include:

Expand Down Expand Up @@ -135,32 +136,39 @@ jobs:
runner: ubuntu-latest
ignore_error: false

- name: 9.10.2+ucd2haskell
ghc_version: 9.10.2
- name: 9.10.3+ucd2haskell
ghc_version: 9.10.3
cabal_version: 3.12.1.0
pack_options: CABAL_BUILD_OPTIONS="-f ucd2haskell"
runner: ubuntu-latest
ignore_error: false

- name: 9.12.2
ghc_version: 9.12.2
- name: 9.12.4
ghc_version: 9.12.4
cabal_version: 3.14.2.0
runner: ubuntu-latest
ignore_error: false

- name: 9.14.1-alpha
ghc_version: head
cabal_version: 3.16.0.0
pack_options: >
GHCUP_GHC_OPTIONS="-u https://downloads.haskell.org/ghc/9.14.1-alpha1/ghc-9.14.0.20250819-x86_64-deb12-linux.tar.xz"
DISABLE_TEST=y
CABAL_BUILD_OPTIONS="--allow-newer=base,template-haskell"
- name: 9.14.1
ghc_version: 9.14.1
cabal_version: 3.16.1.0
runner: ubuntu-latest
ignore_error: false

# TODO: update to 9.16.1-alpha when available
# - name: 9.16.1-alpha
# ghc_version: head
# cabal_version: 3.16.1.0
# pack_options: >
# GHCUP_GHC_OPTIONS="-u https://downloads.haskell.org/ghc/9.16.1-alpha1/ghc-9.16.0.XXXXX-x86_64-deb12-linux.tar.xz"
# DISABLE_TEST=y
# CABAL_BUILD_OPTIONS="--allow-newer=base,template-haskell"
# runner: ubuntu-latest
# ignore_error: false

# Template for release candidates
# - name: 9.10.0
# ghc_version: 9.8.0.XXXXX
# - name: 9.16.0
# ghc_version: 9.16.0.XXXXX
# ghcup_release_channel: "https://raw.githubusercontent.com/haskell/ghcup-metadata/master/ghcup-prereleases-0.0.7.yaml"
# runner: ubuntu-latest
# cabal_version: latest
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ This repository provides packages to use the

The Haskell data structures are generated programmatically from the UCD files.
The latest Unicode version supported by these libraries is
[`17.0.0`](https://www.unicode.org/versions/Unicode17.0.0/).
[`18.0.0`](https://www.unicode.org/versions/Unicode18.0.0/).

### `unicode-data`

Expand Down
6 changes: 3 additions & 3 deletions experimental/icu/icu.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ copyright: 2023 Composewell Technologies and Contributors
category: Data,Text,Unicode
stability: Experimental
build-type: Simple
extra-source-files:
cbits/icu.c
cbits/icu.h

source-repository head
type: git
Expand Down Expand Up @@ -48,9 +51,6 @@ common compile-options
-fwarn-tabs
default-language: Haskell2010

extra-source-files:
cbits/icu.h

library
import: default-extensions, compile-options
if flag(has-icu)
Expand Down
11 changes: 6 additions & 5 deletions experimental/unicode-data-text/unicode-data-text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ tested-with: GHC==8.2.2
, GHC==9.4.8
, GHC==9.6.5
, GHC==9.8.4
, GHC==9.10.2
, GHC==9.12.2
, GHC==9.10.3
, GHC==9.12.4
, GHC==9.14.1

extra-doc-files:
Changelog.md
Expand Down Expand Up @@ -68,7 +69,7 @@ library
build-depends:
base >= 4.7 && < 4.23,
text >= 1.2.4 && < 2.2,
unicode-data >= 0.8 && < 0.9
unicode-data >= 0.9 && < 0.10

test-suite test
import: default-extensions, compile-options
Expand All @@ -82,7 +83,7 @@ test-suite test
base >= 4.7 && < 4.23,
hspec >= 2.0 && < 2.12,
text >= 1.2.4 && < 2.2,
unicode-data >= 0.8 && < 0.9,
unicode-data >= 0.9 && < 0.10,
unicode-data-text
build-tool-depends:
hspec-discover:hspec-discover >= 2.0 && < 2.12
Expand All @@ -98,7 +99,7 @@ benchmark bench
tasty-bench >= 0.2.5 && < 0.6,
tasty >= 1.4.1 && < 1.6,
text >= 1.2.4 && < 2.2,
unicode-data >= 0.8 && < 0.9,
unicode-data >= 0.9 && < 0.10,
unicode-data-text
-- [NOTE] Recommendation of tasty-bench to reduce garbage collection noisiness
ghc-options: -O2 -fdicts-strict -rtsopts -with-rtsopts=-A32m
Expand Down
38 changes: 19 additions & 19 deletions ucd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# we used to generate them earlier are exactly the same as the ones we are
# downloading. To ensure that verfication of the checksum is necessary.

VERSION=17.0.0
VERSION=18.0.0

# When downloading fresh new version comment this out
VERIFY_CHECKSUM=y
Expand All @@ -16,31 +16,31 @@ UCD_URL="https://www.unicode.org/Public/$VERSION/ucd"
# $ find data/$VERSION/ -type f -print0 | xargs -0 sha256sum
# Format: filename:checksum
UCD_FILES="\
Blocks.txt:c0edefaf1a19771e830a82735472716af6bf3c3975f6c2a23ffbe2580fbbcb15\
CaseFolding.txt:ff8d8fefbf123574205085d6714c36149eb946d717a0c585c27f0f4ef58c4183\
DerivedCoreProperties.txt:24c7fed1195c482faaefd5c1e7eb821c5ee1fb6de07ecdbaa64b56a99da22c08\
DerivedNormalizationProps.txt:71fd6a206a2c0cdd41feb6b7f656aa31091db45e9cedc926985d718397f9e488\
NameAliases.txt:793f6f1e4d15fd90f05ae66460191dc4d75d1fea90136a25f30dd6a4cb950eac\
PropertyValueAliases.txt:64e9a5f76f7a1e8b5a47d6a1f9a26522a251208f5276bdfa1559dac7cf2e827a\
PropList.txt:130dcddcaadaf071008bdfce1e7743e04fdfbc910886f017d9f9ac931d8c64dd\
Scripts.txt:9f5e50d3abaee7d6ce09480f325c706f485ae3240912527e651954d2d6b035bf\
ScriptExtensions.txt:ec2107e58825a1586acee8e0911ce18260394ac8b87e535ca325f1ccbeb06bc6\
SpecialCasing.txt:efc25faf19de21b92c1194c111c932e03d2a5eaf18194e33f1156e96de4c9588\
UnicodeData.txt:2e1efc1dcb59c575eedf5ccae60f95229f706ee6d031835247d843c11d96470c\
extracted/DerivedCombiningClass.txt:191463abfbd202703c6fd6776a92a23ac44ec65e0476a7f95aa91ca492cef29b\
extracted/DerivedName.txt:019758bbe6c756c40fca6d505187ea660c5e195533e2ff2c841963a212c9d369\
extracted/DerivedNumericValues.txt:139b976bdc288be01c80f018523da769cf2845109b5a7f0f8a432db64bfedcfa"
Blocks.txt:2ba23579ee38e0b62b00a1a448f094eb54a823fb96109da685ad261b9bd493f2\
CaseFolding.txt:a004797658a457bec4dc11683e39f69249ea3b595b752dbea6721c4c9f587b0d\
DerivedCoreProperties.txt:b040c9b05cd49ca9ecced4066e667abf7513488fa726cc01bbd54948319bdea6\
DerivedNormalizationProps.txt:98ac7f67d985fe781e317f6182e885e94cabb0c314769e6dd73e48b226931ccd\
NameAliases.txt:3d5cf5e468901b080cd99adf2230061b748083705fe633db43ac8b73ec7a13da\
PropertyValueAliases.txt:19a6c4ab639d09a42881507ff1a1055286768f24e9b96d1e810b354a759ec030\
PropList.txt:ff65a2e6a57992504dad1251281eb9de7f0b223f73c7af9108466b5769141721\
Scripts.txt:66b1d21a528a1eb8bbfd48e05c2c0808435e90d1787cf5a69752dc88f6e941a0\
ScriptExtensions.txt:a5dfefc6660af2e34f47269915ca321f58574aa276de2acea9760cd94af96968\
SpecialCasing.txt:8538dea57c184f1ef3783885ea79677b10f6efa06423717157e63712f14d1ad2\
UnicodeData.txt:3a30a86cb25ccd95a63baffeabce026ccf59a19d2f7e705acc0a44c1326a5764\
extracted/DerivedCombiningClass.txt:10048c71ff6860cb707ee21cb7d401a1e50b1169e8e5613018ebca616ed8f5b1\
extracted/DerivedName.txt:fa05755dfa75395823ce0d8cc0508f645b6bbcf8f59a9468b80586d458b3cd5f\
extracted/DerivedNumericValues.txt:94e6c8a73e460c4196ab198aec2219cf3bbfd5aae6595f5d048311d41d20c9fb"

# Security files:
# - < 17.0.0: https://www.unicode.org/Public/security/$VERSION/$file)
# - ≥ 17.0.0: https://www.unicode.org/Public/$VERSION/security/$file)
SECURITY_URL="https://www.unicode.org/Public/$VERSION/security"
# Format: filename:checksum
SECURITY_FILES="\
IdentifierStatus.txt:617228a16da13850bf8af28b6cd08f5e9b6595d2eb60404fe6eee2c85b4e4a35\
IdentifierType.txt:924ac63faa97ed73420d6ac48d08279d90968c7da0502ab701e08bfbb9683c22\
confusables.txt:091c7f82fc39ef208faf8f94d29c244de99254675e09de163160c810d13ef22a\
intentional.txt:33738217c15c1a0df0b7a2cc0a0b50b27ebdca119ca11253440ec0102f05626b"
IdentifierStatus.txt:5863c7d99ca18f213c41c7318aa5528bebfb6d32ec0f1d5944e37192c119aebd\
IdentifierType.txt:16cd9c65392945904890bb2b64c686397438bcf29b1a47a0fc478a24c17d433a\
confusables.txt:fa913a52e5ee1106631b0c5489198e36970596fa2d2b3a36e9ed5dfb8f66e4c6\
intentional.txt:5b69cdfd7be6be45d51b9cf7ec799df91c1acc47c557d66c92a8d6623df78b0e"

# Download the files

Expand Down
29 changes: 27 additions & 2 deletions ucd2haskell/exe/UCD2Haskell/Modules/UnicodeData/DerivedNames.hs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ genNamesModule moduleName = Fold step initial done
egyptianHieroglyph = 0xf3
khitan = 0xf4
nushu = 0xf5
jurchen = 0xf6
seal = 0xf7
hangul = 0x80

rangePrefixes =
Expand All @@ -107,7 +109,9 @@ genNamesModule moduleName = Fold step initial done
, "TANGUT IDEOGRAPH-"
, "EGYPTIAN HIEROGLYPH-"
, "KHITAN SMALL SCRIPT CHARACTER-"
, "NUSHU CHARACTER-" ]
, "NUSHU CHARACTER-"
, "JURCHEN CHARACTER-"
, "SEAL CHARACTER-" ]

encodeName name
| BS.take 28 name == "CJK COMPATIBILITY IDEOGRAPH-" = ("", cjkCompat, 0, True)
Expand All @@ -116,13 +120,14 @@ genNamesModule moduleName = Fold step initial done
| BS.take 20 name == "EGYPTIAN HIEROGLYPH-" = ("", egyptianHieroglyph, 0, True)
| BS.take 30 name == "KHITAN SMALL SCRIPT CHARACTER-" = ("", khitan, 0, True)
| BS.take 16 name == "NUSHU CHARACTER-" = ("", nushu, 0, True)
| BS.take 18 name == "JURCHEN CHARACTER-" = ("", jurchen, 0, True)
| BS.take 15 name == "SEAL CHARACTER-" = ("", seal, 0, True)
| BS.take 16 name == "HANGUL SYLLABLE " =
let !name' = BS.drop 16 name; !len = BS.length name'
in if len <= 12
then (name', hangul + len, len, True)
else error ("genNamesModule: cannot encode Hangul: " <> show len)
| otherwise = let !len = BS.length name in (name, len, len, False)

encodeOffset offset len = encode32LE offset' mempty
where !offset' = len .|. (offset `shiftL` 8)
encode32LE v acc
Expand All @@ -146,6 +151,8 @@ genNamesModule moduleName = Fold step initial done
, " , pattern EgyptianHieroglyph"
, " , pattern KhitanSmallScript"
, " , pattern NushuCharacter"
, " , pattern JurchenCharacter"
, " , pattern SealCharacter"
, " , pattern HangulSyllable"
, " ) where"
, ""
Expand Down Expand Up @@ -200,6 +207,18 @@ genNamesModule moduleName = Fold step initial done
, "pattern NushuCharacter :: Int#"
, "pattern NushuCharacter = 0x" <> intHex nushu <> "#"
, ""
, "-- | Jurchen Character. Used to test the length returned by 'name'."
, "--"
, "-- @since 0.8.0"
, "pattern JurchenCharacter :: Int#"
, "pattern JurchenCharacter = 0x" <> intHex jurchen <> "#"
, ""
, "-- | Seal Character. Used to test the length returned by 'name'."
, "--"
, "-- @since 0.8.0"
, "pattern SealCharacter :: Int#"
, "pattern SealCharacter = 0x" <> intHex seal <> "#"
, ""
, "-- | Hangul syllable. Used to test the length returned by 'name'."
, "--"
, "-- @since 0.3.0"
Expand Down Expand Up @@ -230,6 +249,12 @@ genNamesModule moduleName = Fold step initial done
, "-- * If length = @'NushuCharacter'@,"
, "-- then the name is generated from the pattern “NUSHU CHARACTER-*”,"
, "-- where * is the hexadecimal codepoint."
, "-- * If length = @'JurchenCharacter'@,"
, "-- then the name is generated from the pattern “JURCHEN CHARACTER-*”,"
, "-- where * is the hexadecimal codepoint."
, "-- * If length = @'SealCharacter'@,"
, "-- then the name is generated from the pattern “SEAL CHARACTER-*”,"
, "-- where * is the hexadecimal codepoint."
, "-- * If length ≥ @'HangulSyllable'@,"
, "-- then the name is generated by prepending “HANGUL SYLLABLE ”"
, "-- to the returned string."
Expand Down
7 changes: 4 additions & 3 deletions ucd2haskell/ucd2haskell.cabal
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cabal-version: 2.2
name: ucd2haskell
version: 0.7.0
version: 0.8.0
synopsis: Generate the code of the unicode-data* packages
description:
Internal package for the generation of @unicode-data*@ packages.
Expand All @@ -18,8 +18,9 @@ tested-with: GHC==9.2.8
, GHC==9.4.8
, GHC==9.6.5
, GHC==9.8.4
, GHC==9.10.2
, GHC==9.12.2
, GHC==9.10.3
, GHC==9.12.4
, GHC==9.14.1

extra-doc-files:
README.md
Expand Down
4 changes: 4 additions & 0 deletions unicode-data-names/Changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 0.7.0 (September 2026)

- Updated to [Unicode 18.0.0](https://www.unicode.org/versions/Unicode18.0.0/).

## 0.6.0 (September 2025)

- Updated to [Unicode 17.0.0](https://www.unicode.org/versions/Unicode17.0.0/).
Expand Down
2 changes: 1 addition & 1 deletion unicode-data-names/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ There are 3 APIs:
The Haskell data structures are generated programmatically from the
Unicode character database (UCD) files. The latest Unicode version
supported by this library is
[`17.0.0`](https://www.unicode.org/versions/Unicode17.0.0/).
[`18.0.0`](https://www.unicode.org/versions/Unicode18.0.0/).

Please see the
[Haddock documentation](https://hackage.haskell.org/package/unicode-data-names)
Expand Down
8 changes: 8 additions & 0 deletions unicode-data-names/lib/Unicode/Char/General/Names.hs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ name (C# c#) = case DerivedName.name c# of
where
!hex = showHex c#
!n = 'N':'U':'S':'H':'U':' ':'C':'H':'A':'R':'A':'C':'T':'E':'R':'-':hex
DerivedName.JurchenCharacter -> Just n
where
!hex = showHex c#
!n = 'J':'U':'R':'C':'H':'E':'N':' ':'C':'H':'A':'R':'A':'C':'T':'E':'R':'-':hex
DerivedName.SealCharacter -> Just n
where
!hex = showHex c#
!n = 'S':'E':'A':'L':' ':'C':'H':'A':'R':'A':'C':'T':'E':'R':'-':hex
_
| isTrue# (len# <# DerivedName.HangulSyllable) -> let !n = unpack name# [] len# in Just n
| otherwise ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ name (C# c#) = case DerivedName.name c# of
DerivedName.NushuCharacter -> Just n
where
!n = mkNameFromTemplate# "NUSHU CHARACTER-"# 16# (ord# c#)
DerivedName.JurchenCharacter -> Just n
where
!n = mkNameFromTemplate# "JURCHEN CHARACTER-"# 18# (ord# c#)
DerivedName.SealCharacter -> Just n
where
!n = mkNameFromTemplate# "SEAL CHARACTER-"# 15# (ord# c#)
_
| isTrue# (len# <# DerivedName.HangulSyllable) ->
let !n = unpackAddr# name# len#
Expand Down
6 changes: 6 additions & 0 deletions unicode-data-names/lib/Unicode/Char/General/Names/Text.hs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ name (C# c#) = case DerivedName.name c# of
DerivedName.NushuCharacter -> Just n
where
!n = mkNameFromTemplate "NUSHU CHARACTER-"# 16# (ord# c#)
DerivedName.JurchenCharacter -> Just n
where
!n = mkNameFromTemplate "JURCHEN CHARACTER-"# 18# (ord# c#)
DerivedName.SealCharacter -> Just n
where
!n = mkNameFromTemplate "SEAL CHARACTER-"# 15# (ord# c#)
_
| isTrue# (len# <# DerivedName.HangulSyllable) ->
let !n = unpackAddr# name# len#
Expand Down
4 changes: 2 additions & 2 deletions unicode-data-names/lib/Unicode/Internal/Char/Names/Version.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ module Unicode.Internal.Char.Names.Version (unicodeVersion) where
import Data.Version (Version, makeVersion)

-- | Version of the Unicode standard used by this package:
-- [17.0.0](https://www.unicode.org/versions/Unicode17.0.0/).
-- [18.0.0](https://www.unicode.org/versions/Unicode18.0.0/).
--
-- @since 0.3.0
unicodeVersion :: Version
unicodeVersion = makeVersion [17,0,0]
unicodeVersion = makeVersion [18,0,0]
Loading
Loading