diff --git a/contrib/btree_gist/btree_utils_var.c b/contrib/btree_gist/btree_utils_var.c
index 0c0e952f736..63137c5c85c 100644
--- a/contrib/btree_gist/btree_utils_var.c
+++ b/contrib/btree_gist/btree_utils_var.c
@@ -116,36 +116,47 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
 
 /*
  * returns the common prefix length of a node key
+ *
+ * If the underlying type is character data, the prefix length may point in
+ * the middle of a multibyte character.
 */
 static int32
 gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
 {
 	GBT_VARKEY_R r = gbt_var_key_readable(node);
 	int32		i = 0;
-	int32		l = 0;
+	int32		l_left_to_match = 0;
+	int32		l_total = 0;
 	int32		t1len = VARSIZE(r.lower) - VARHDRSZ;
 	int32		t2len = VARSIZE(r.upper) - VARHDRSZ;
 	int32		ml = Min(t1len, t2len);
 	char	   *p1 = VARDATA(r.lower);
 	char	   *p2 = VARDATA(r.upper);
+	const char *end1 = p1 + t1len;
+	const char *end2 = p2 + t2len;
 
 	if (ml == 0)
 		return 0;
 
 	while (i < ml)
 	{
-		if (tinfo->eml > 1 && l == 0)
+		if (tinfo->eml > 1 && l_left_to_match == 0)
 		{
-			if ((l = pg_mblen(p1)) != pg_mblen(p2))
+			l_total = pg_mblen_range(p1, end1);
+			if (l_total != pg_mblen_range(p2, end2))
 			{
 				return i;
 			}
+			l_left_to_match = l_total;
 		}
 		if (*p1 != *p2)
 		{
 			if (tinfo->eml > 1)
 			{
-				return (i - l + 1);
+				int32		l_matched_subset = l_total - l_left_to_match;
+
+				/* end common prefix at final byte of last matching char */
+				return i - l_matched_subset;
 			}
 			else
 			{
@@ -155,7 +166,7 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
 
 		p1++;
 		p2++;
-		l--;
+		l_left_to_match--;
 		i++;
 	}
 	return ml;					/* lower == upper */
diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c
index e538928aba4..19db56f0352 100644
--- a/contrib/dict_xsyn/dict_xsyn.c
+++ b/contrib/dict_xsyn/dict_xsyn.c
@@ -48,15 +48,15 @@ find_word(char *in, char **end)
 	char	   *start;
 
 	*end = NULL;
-	while (*in && t_isspace(in))
-		in += pg_mblen(in);
+	while (*in && t_isspace_cstr(in))
+		in += pg_mblen_cstr(in);
 
 	if (!*in || *in == '#')
 		return NULL;
 	start = in;
 
-	while (*in && !t_isspace(in))
-		in += pg_mblen(in);
+	while (*in && !t_isspace_cstr(in))
+		in += pg_mblen_cstr(in);
 
 	*end = in;
 
diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index 999ddad76d9..08c1b216aa2 100644
--- a/contrib/hstore/hstore_io.c
+++ b/contrib/hstore/hstore_io.c
@@ -64,7 +64,7 @@ prssyntaxerror(HSParser *state)
 	errsave(state->escontext,
 			(errcode(ERRCODE_SYNTAX_ERROR),
 			 errmsg("syntax error in hstore, near \"%.*s\" at position %d",
-					pg_mblen(state->ptr), state->ptr,
+					pg_mblen_cstr(state->ptr), state->ptr,
 					(int) (state->ptr - state->begin))));
 	/* In soft error situation, return false as convenience for caller */
 	return false;
diff --git a/contrib/intarray/_int_selfuncs.c b/contrib/intarray/_int_selfuncs.c
index d4793b0b638..015649ab334 100644
--- a/contrib/intarray/_int_selfuncs.c
+++ b/contrib/intarray/_int_selfuncs.c
@@ -19,6 +19,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "commands/extension.h"
 #include "miscadmin.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
@@ -171,7 +172,18 @@ _int_matchsel(PG_FUNCTION_ARGS)
 		PG_RETURN_FLOAT8(0.0);
 	}
 
-	/* The caller made sure the const is a query, so get it now */
+	/*
+	 * Verify that the Const is a query_int, else return a default estimate.
+	 * (This could only fail if someone attached this estimator to the wrong
+	 * operator.)
+	 */
+	if (((Const *) other)->consttype !=
+		get_function_sibling_type(fcinfo->flinfo->fn_oid, "query_int"))
+	{
+		ReleaseVariableStats(vardata);
+		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+	}
+
 	query = DatumGetQueryTypeP(((Const *) other)->constvalue);
 
 	/* Empty query matches nothing */
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index a6466f575fd..bbc1f9f5c72 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -27,14 +27,14 @@ getlexeme(char *start, char *end, int *len)
 	char	   *ptr;
 
 	while (start < end && t_iseq(start, '_'))
-		start += pg_mblen(start);
+		start += pg_mblen_range(start, end);
 
 	ptr = start;
 	if (ptr >= end)
 		return NULL;
 
 	while (ptr < end && !t_iseq(ptr, '_'))
-		ptr += pg_mblen(ptr);
+		ptr += pg_mblen_range(ptr, end);
 
 	*len = ptr - start;
 	return start;
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index 5e0761641d3..cf24add69f7 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -127,7 +127,7 @@ typedef struct
 #define LQUERY_HASNOT		0x01
 
 /* valid label chars are alphanumerics, underscores and hyphens */
-#define ISLABEL(x) ( t_isalnum(x) || t_iseq(x, '_') || t_iseq(x, '-') )
+#define ISLABEL(x) ( t_isalnum_cstr(x) || t_iseq(x, '_') || t_iseq(x, '-') )
 
 /* full text query */
 
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 3a0a4266870..24d2bf67def 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -56,7 +56,7 @@ parse_ltree(const char *buf, struct Node *escontext)
 	ptr = buf;
 	while (*ptr)
 	{
-		charlen = pg_mblen(ptr);
+		charlen = pg_mblen_cstr(ptr);
 		if (t_iseq(ptr, '.'))
 			num++;
 		ptr += charlen;
@@ -71,7 +71,7 @@ parse_ltree(const char *buf, struct Node *escontext)
 	ptr = buf;
 	while (*ptr)
 	{
-		charlen = pg_mblen(ptr);
+		charlen = pg_mblen_cstr(ptr);
 
 		switch (state)
 		{
@@ -293,7 +293,7 @@ parse_lquery(const char *buf, struct Node *escontext)
 	ptr = buf;
 	while (*ptr)
 	{
-		charlen = pg_mblen(ptr);
+		charlen = pg_mblen_cstr(ptr);
 
 		if (t_iseq(ptr, '.'))
 			num++;
@@ -313,7 +313,7 @@ parse_lquery(const char *buf, struct Node *escontext)
 	ptr = buf;
 	while (*ptr)
 	{
-		charlen = pg_mblen(ptr);
+		charlen = pg_mblen_cstr(ptr);
 
 		switch (state)
 		{
@@ -418,7 +418,7 @@ parse_lquery(const char *buf, struct Node *escontext)
 			case LQPRS_WAITFNUM:
 				if (t_iseq(ptr, ','))
 					state = LQPRS_WAITSNUM;
-				else if (t_isdigit(ptr))
+				else if (t_isdigit_cstr(ptr))
 				{
 					int			low = atoi(ptr);
 
@@ -436,7 +436,7 @@ parse_lquery(const char *buf, struct Node *escontext)
 					UNCHAR;
 				break;
 			case LQPRS_WAITSNUM:
-				if (t_isdigit(ptr))
+				if (t_isdigit_cstr(ptr))
 				{
 					int			high = atoi(ptr);
 
@@ -467,7 +467,7 @@ parse_lquery(const char *buf, struct Node *escontext)
 			case LQPRS_WAITCLOSE:
 				if (t_iseq(ptr, '}'))
 					state = LQPRS_WAITEND;
-				else if (!t_isdigit(ptr))
+				else if (!t_isdigit_cstr(ptr))
 					UNCHAR;
 				break;
 			case LQPRS_WAITND:
@@ -478,7 +478,7 @@ parse_lquery(const char *buf, struct Node *escontext)
 				}
 				else if (t_iseq(ptr, ','))
 					state = LQPRS_WAITSNUM;
-				else if (!t_isdigit(ptr))
+				else if (!t_isdigit_cstr(ptr))
 					UNCHAR;
 				break;
 			case LQPRS_WAITEND:
diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c
index 2c27ebd180f..0f2954f31ba 100644
--- a/contrib/ltree/ltxtquery_io.c
+++ b/contrib/ltree/ltxtquery_io.c
@@ -64,7 +64,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
 
 	for (;;)
 	{
-		charlen = pg_mblen(state->buf);
+		charlen = pg_mblen_cstr(state->buf);
 
 		switch (state->state)
 		{
@@ -88,7 +88,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
 					*lenval = charlen;
 					*flag = 0;
 				}
-				else if (!t_isspace(state->buf))
+				else if (!t_isspace_cstr(state->buf))
 					ereturn(state->escontext, ERR,
 							(errcode(ERRCODE_SYNTAX_ERROR),
 							 errmsg("operand syntax error")));
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index e8f43b8bdbf..803472e0928 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -101,7 +101,7 @@ text_to_bits(char *str, int len)
 			ereport(ERROR,
 					(errcode(ERRCODE_DATA_CORRUPTED),
 					 errmsg("invalid character \"%.*s\" in t_bits string",
-							pg_mblen(str + off), str + off)));
+							pg_mblen_cstr(str + off), str + off)));
 
 		if (off % 8 == 7)
 			bits[off / 8] = byte;
diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile
index f8ecb34a2d2..faae60f8869 100644
--- a/contrib/pg_trgm/Makefile
+++ b/contrib/pg_trgm/Makefile
@@ -14,7 +14,7 @@ DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
 	pg_trgm--1.0--1.1.sql
 PGFILEDESC = "pg_trgm - trigram matching"
 
-REGRESS = pg_trgm pg_word_trgm pg_strict_word_trgm
+REGRESS = pg_trgm pg_utf8_trgm pg_word_trgm pg_strict_word_trgm
 REGRESS_OPTS += --init-file=$(top_srcdir)/src/test/regress/init_file
 
 ifdef USE_PGXS
diff --git a/contrib/pg_trgm/data/trgm_utf8.data b/contrib/pg_trgm/data/trgm_utf8.data
new file mode 100644
index 00000000000..713856e76a6
--- /dev/null
+++ b/contrib/pg_trgm/data/trgm_utf8.data
@@ -0,0 +1,50 @@
+Mathematics
+数学
+गणित
+Matemáticas
+رياضيات
+Mathématiques
+গণিত
+Matemática
+Математика
+ریاضی
+Matematika
+Mathematik
+数学
+Mathematics
+गणित
+గణితం
+Matematik
+கணிதம்
+數學
+Toán học
+Matematika
+数学
+수학
+ریاضی
+Lissafi
+Hisabati
+Matematika
+Matematica
+ریاضی
+ಗಣಿತ
+ગણિત
+คณิตศาสตร์
+ሂሳብ
+गणित
+ਗਣਿਤ
+數學
+数学
+Iṣiro
+數學
+သင်္ချာ
+Herrega
+رياضي
+गणित
+Математика
+Matematyka
+ഗണിതം
+Matematika
+رياضي
+Matematika
+Matematică
diff --git a/contrib/pg_trgm/expected/pg_utf8_trgm.out b/contrib/pg_trgm/expected/pg_utf8_trgm.out
new file mode 100644
index 00000000000..0768e7d6a83
--- /dev/null
+++ b/contrib/pg_trgm/expected/pg_utf8_trgm.out
@@ -0,0 +1,8 @@
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+-- Index 50 translations of the word "Mathematics"
+CREATE TEMP TABLE mb (s text);
+\copy mb from 'data/trgm_utf8.data'
+CREATE INDEX ON mb USING gist(s gist_trgm_ops);
diff --git a/contrib/pg_trgm/expected/pg_utf8_trgm_1.out b/contrib/pg_trgm/expected/pg_utf8_trgm_1.out
new file mode 100644
index 00000000000..8505c4fa552
--- /dev/null
+++ b/contrib/pg_trgm/expected/pg_utf8_trgm_1.out
@@ -0,0 +1,3 @@
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/contrib/pg_trgm/meson.build b/contrib/pg_trgm/meson.build
index 093ac18400c..bd3a34f2557 100644
--- a/contrib/pg_trgm/meson.build
+++ b/contrib/pg_trgm/meson.build
@@ -39,6 +39,7 @@ tests += {
   'regress': {
     'sql': [
       'pg_trgm',
+      'pg_utf8_trgm',
       'pg_word_trgm',
       'pg_strict_word_trgm',
     ],
diff --git a/contrib/pg_trgm/sql/pg_utf8_trgm.sql b/contrib/pg_trgm/sql/pg_utf8_trgm.sql
new file mode 100644
index 00000000000..0dd962ced83
--- /dev/null
+++ b/contrib/pg_trgm/sql/pg_utf8_trgm.sql
@@ -0,0 +1,9 @@
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+-- Index 50 translations of the word "Mathematics"
+CREATE TEMP TABLE mb (s text);
+\copy mb from 'data/trgm_utf8.data'
+CREATE INDEX ON mb USING gist(s gist_trgm_ops);
diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h
index afb0adb222b..1bd41fa1901 100644
--- a/contrib/pg_trgm/trgm.h
+++ b/contrib/pg_trgm/trgm.h
@@ -52,10 +52,10 @@ typedef char trgm[3];
 } while(0)
 
 #ifdef KEEPONLYALNUM
-#define ISWORDCHR(c)	(t_isalnum(c))
+#define ISWORDCHR(c, len)	(t_isalnum_with_len(c, len))
 #define ISPRINTABLECHAR(a)	( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
 #else
-#define ISWORDCHR(c)	(!t_isspace(c))
+#define ISWORDCHR(c, len)	(!t_isspace_with_len(c, len))
 #define ISPRINTABLECHAR(a)	( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
 #endif
 #define ISPRINTABLETRGM(t)	( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index 49d4497b4f3..32c390257b3 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -173,18 +173,29 @@ static char *
 find_word(char *str, int lenstr, char **endword, int *charlen)
 {
 	char	   *beginword = str;
+	const char *endstr = str + lenstr;
 
-	while (beginword - str < lenstr && !ISWORDCHR(beginword))
-		beginword += pg_mblen(beginword);
+	while (beginword < endstr)
+	{
+		int			clen = pg_mblen_range(beginword, endstr);
 
-	if (beginword - str >= lenstr)
+		if (ISWORDCHR(beginword, clen))
+			break;
+		beginword += clen;
+	}
+
+	if (beginword >= endstr)
 		return NULL;
 
 	*endword = beginword;
 	*charlen = 0;
-	while (*endword - str < lenstr && ISWORDCHR(*endword))
+	while (*endword < endstr)
 	{
-		*endword += pg_mblen(*endword);
+		int			clen = pg_mblen_range(*endword, endstr);
+
+		if (!ISWORDCHR(*endword, clen))
+			break;
+		*endword += clen;
 		(*charlen)++;
 	}
 
@@ -232,9 +243,9 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
 	if (bytelen > charlen)
 	{
 		/* Find multibyte character boundaries and apply compact_trigram */
-		int			lenfirst = pg_mblen(str),
-					lenmiddle = pg_mblen(str + lenfirst),
-					lenlast = pg_mblen(str + lenfirst + lenmiddle);
+		int			lenfirst = pg_mblen_unbounded(str),
+					lenmiddle = pg_mblen_unbounded(str + lenfirst),
+					lenlast = pg_mblen_unbounded(str + lenfirst + lenmiddle);
 
 		while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
 		{
@@ -245,7 +256,7 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
 
 			lenfirst = lenmiddle;
 			lenmiddle = lenlast;
-			lenlast = pg_mblen(ptr + lenfirst + lenmiddle);
+			lenlast = pg_mblen_unbounded(ptr + lenfirst + lenmiddle);
 		}
 	}
 	else
@@ -730,6 +741,7 @@ get_wildcard_part(const char *str, int lenstr,
 {
 	const char *beginword = str;
 	const char *endword;
+	const char *endstr = str + lenstr;
 	char	   *s = buf;
 	bool		in_leading_wildcard_meta = false;
 	bool		in_trailing_wildcard_meta = false;
@@ -742,11 +754,13 @@ get_wildcard_part(const char *str, int lenstr,
 	 * from this loop to the next one, since we may exit at a word character
 	 * that is in_escape.
 	 */
-	while (beginword - str < lenstr)
+	while (beginword < endstr)
 	{
+		clen = pg_mblen_range(beginword, endstr);
+
 		if (in_escape)
 		{
-			if (ISWORDCHR(beginword))
+			if (ISWORDCHR(beginword, clen))
 				break;
 			in_escape = false;
 			in_leading_wildcard_meta = false;
@@ -757,12 +771,12 @@ get_wildcard_part(const char *str, int lenstr,
 				in_escape = true;
 			else if (ISWILDCARDCHAR(beginword))
 				in_leading_wildcard_meta = true;
-			else if (ISWORDCHR(beginword))
+			else if (ISWORDCHR(beginword, clen))
 				break;
 			else
 				in_leading_wildcard_meta = false;
 		}
-		beginword += pg_mblen(beginword);
+		beginword += clen;
 	}
 
 	/*
@@ -795,12 +809,12 @@ get_wildcard_part(const char *str, int lenstr,
 	 * string boundary.  Strip escapes during copy.
 	 */
 	endword = beginword;
-	while (endword - str < lenstr)
+	while (endword < endstr)
 	{
-		clen = pg_mblen(endword);
+		clen = pg_mblen_range(endword, endstr);
 		if (in_escape)
 		{
-			if (ISWORDCHR(endword))
+			if (ISWORDCHR(endword, clen))
 			{
 				memcpy(s, endword, clen);
 				(*charlen)++;
@@ -828,7 +842,7 @@ get_wildcard_part(const char *str, int lenstr,
 				in_trailing_wildcard_meta = true;
 				break;
 			}
-			else if (ISWORDCHR(endword))
+			else if (ISWORDCHR(endword, clen))
 			{
 				memcpy(s, endword, clen);
 				(*charlen)++;
diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c
index 1d369460671..6d797c72203 100644
--- a/contrib/pg_trgm/trgm_regexp.c
+++ b/contrib/pg_trgm/trgm_regexp.c
@@ -481,7 +481,7 @@ static TRGM *createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
 static void RE_compile(regex_t *regex, text *text_re,
 					   int cflags, Oid collation);
 static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA);
-static bool convertPgWchar(pg_wchar c, trgm_mb_char *result);
+static int	convertPgWchar(pg_wchar c, trgm_mb_char *result);
 static void transformGraph(TrgmNFA *trgmNFA);
 static void processState(TrgmNFA *trgmNFA, TrgmState *state);
 static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key);
@@ -806,10 +806,11 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
 		for (j = 0; j < charsCount; j++)
 		{
 			trgm_mb_char c;
+			int			clen = convertPgWchar(chars[j], &c);
 
-			if (!convertPgWchar(chars[j], &c))
+			if (!clen)
 				continue;		/* ok to ignore it altogether */
-			if (ISWORDCHR(c.bytes))
+			if (ISWORDCHR(c.bytes, clen))
 				colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
 			else
 				colorInfo->containsNonWord = true;
@@ -821,13 +822,15 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
 
 /*
  * Convert pg_wchar to multibyte format.
- * Returns false if the character should be ignored completely.
+ * Returns 0 if the character should be ignored completely, else returns its
+ * byte length.
  */
-static bool
+static int
 convertPgWchar(pg_wchar c, trgm_mb_char *result)
 {
 	/* "s" has enough space for a multibyte character and a trailing NUL */
 	char		s[MAX_MULTIBYTE_CHAR_LEN + 1];
+	int			clen;
 
 	/*
 	 * We can ignore the NUL character, since it can never appear in a PG text
@@ -835,11 +838,11 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
 	 * reconstructing trigrams.
 	 */
 	if (c == 0)
-		return false;
+		return 0;
 
 	/* Do the conversion, making sure the result is NUL-terminated */
 	memset(s, 0, sizeof(s));
-	pg_wchar2mb_with_len(&c, s, 1);
+	clen = pg_wchar2mb_with_len(&c, s, 1);
 
 	/*
 	 * In IGNORECASE mode, we can ignore uppercase characters.  We assume that
@@ -861,7 +864,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
 		if (strcmp(lowerCased, s) != 0)
 		{
 			pfree(lowerCased);
-			return false;
+			return 0;
 		}
 		pfree(lowerCased);
 	}
@@ -869,7 +872,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
 
 	/* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
 	memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
-	return true;
+	return clen;
 }
 
 
diff --git a/contrib/pgcrypto/Makefile b/contrib/pgcrypto/Makefile
index 647952af597..8b6dabc9fe0 100644
--- a/contrib/pgcrypto/Makefile
+++ b/contrib/pgcrypto/Makefile
@@ -45,7 +45,8 @@ REGRESS = init md5 sha1 hmac-md5 hmac-sha1 blowfish rijndael sm4\
 	sha2 des 3des cast5 \
 	crypt-des crypt-md5 crypt-blowfish crypt-xdes \
 	pgp-armor pgp-decrypt pgp-encrypt $(CF_PGP_TESTS) \
-	pgp-pubkey-decrypt pgp-pubkey-encrypt pgp-info \
+	pgp-pubkey-decrypt pgp-pubkey-encrypt pgp-pubkey-session \
+	pgp-info \
 	setup_fips 
 
 EXTRA_CLEAN = gen-rtab
diff --git a/contrib/pgcrypto/expected/pgp-decrypt.out b/contrib/pgcrypto/expected/pgp-decrypt.out
index eb049ba9d44..1db89e8c00a 100644
--- a/contrib/pgcrypto/expected/pgp-decrypt.out
+++ b/contrib/pgcrypto/expected/pgp-decrypt.out
@@ -315,7 +315,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ==
  \xda39a3ee5e6b4b0d3255bfef95601890afd80709
 (1 row)
 
-select digest(pgp_sym_decrypt(dearmor('
+select digest(pgp_sym_decrypt_bytea(dearmor('
 -----BEGIN PGP MESSAGE-----
 Comment: dat3.aes.sha1.mdc.s2k3.z0
 
@@ -387,6 +387,27 @@ ERROR:  Wrong key or corrupt data
 select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1');
 NOTICE:  dbg: parse_literal_data: data type=b
 ERROR:  Not text data
+-- NUL byte in text decrypt.  Ciphertext source:
+-- printf 'a\x00\xc' | gpg --homedir /nonexistent --textmode \
+--      --personal-cipher-preferences aes --no-emit-version --batch \
+--      --symmetric --passphrase key --armor
+do $$
+begin
+  perform pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+
+jA0EBwMCLd9OvySmZNZg0jgBe7vGTmnje5HGXI+zsIQ99WPZu4Zs/P6pQcZ+HZ4n
+SZQHOfE8tagjB6Rqow82QpSBiOfWn4qjhQ==
+=c2cz
+-----END PGP MESSAGE-----
+'), 'key', 'debug=1');
+exception when others then
+  raise '%',
+    regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]');
+end
+$$;
+ERROR:  invalid byte sequence for encoding [REDACTED]: 0x00
+CONTEXT:  PL/pgSQL function inline_code_block line 12 at RAISE
 -- Decryption with a certain incorrect key yields an apparent BZip2-compressed
 -- plaintext.  Ciphertext source: iterative pgp_sym_encrypt('secret', 'key')
 -- until the random prefix gave rise to that property.
diff --git a/contrib/pgcrypto/expected/pgp-decrypt_1.out b/contrib/pgcrypto/expected/pgp-decrypt_1.out
index 80a4c48613d..d214e0bc0e0 100644
--- a/contrib/pgcrypto/expected/pgp-decrypt_1.out
+++ b/contrib/pgcrypto/expected/pgp-decrypt_1.out
@@ -311,7 +311,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ==
  \xda39a3ee5e6b4b0d3255bfef95601890afd80709
 (1 row)
 
-select digest(pgp_sym_decrypt(dearmor('
+select digest(pgp_sym_decrypt_bytea(dearmor('
 -----BEGIN PGP MESSAGE-----
 Comment: dat3.aes.sha1.mdc.s2k3.z0
 
@@ -383,6 +383,27 @@ ERROR:  Wrong key or corrupt data
 select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1');
 NOTICE:  dbg: parse_literal_data: data type=b
 ERROR:  Not text data
+-- NUL byte in text decrypt.  Ciphertext source:
+-- printf 'a\x00\xc' | gpg --homedir /nonexistent --textmode \
+--      --personal-cipher-preferences aes --no-emit-version --batch \
+--      --symmetric --passphrase key --armor
+do $$
+begin
+  perform pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+
+jA0EBwMCLd9OvySmZNZg0jgBe7vGTmnje5HGXI+zsIQ99WPZu4Zs/P6pQcZ+HZ4n
+SZQHOfE8tagjB6Rqow82QpSBiOfWn4qjhQ==
+=c2cz
+-----END PGP MESSAGE-----
+'), 'key', 'debug=1');
+exception when others then
+  raise '%',
+    regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]');
+end
+$$;
+ERROR:  invalid byte sequence for encoding [REDACTED]: 0x00
+CONTEXT:  PL/pgSQL function inline_code_block line 12 at RAISE
 -- Decryption with a certain incorrect key yields an apparent BZip2-compressed
 -- plaintext.  Ciphertext source: iterative pgp_sym_encrypt('secret', 'key')
 -- until the random prefix gave rise to that property.
diff --git a/contrib/pgcrypto/expected/pgp-pubkey-session.out b/contrib/pgcrypto/expected/pgp-pubkey-session.out
new file mode 100644
index 00000000000..f724d98eb24
--- /dev/null
+++ b/contrib/pgcrypto/expected/pgp-pubkey-session.out
@@ -0,0 +1,47 @@
+-- Test for overflow with session key at decrypt.
+-- Data automatically generated by scripts/pgp_session_data.py.
+-- See this file for details explaining how this data is generated.
+SELECT pgp_pub_decrypt_bytea(
+'\xc1c04c030000000000000000020800a46f5b9b1905b49457a6485474f71ed9b46c2527e1
+da08e1f7871e12c3d38828f2076b984a595bf60f616599ca5729d547de06a258bfbbcd30
+94a321e4668cd43010f0ca8ecf931e5d39bda1152c50c367b11c723f270729245d3ebdbd
+0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5060af7603cfd9ed186ebadd616
+3b50ae42bea5f6d14dda24e6d4687b434c175084515d562e896742b0ba9a1c87d5642e10
+a5550379c71cc490a052ada483b5d96526c0a600fc51755052aa77fdf72f7b4989b920e7
+b90f4b30787a46482670d5caecc7a515a926055ad5509d135702ce51a0e4c1033f2d939d
+8f0075ec3428e17310da37d3d2d7ad1ce99adcc91cd446c366c402ae1ee38250343a7fcc
+0f8bc28020e603d7a4795ef0dcc1c04c030000000000000000020800a46f5b9b1905b494
+57a6485474f71ed9b46c2527e1da08e1f7871e12c3d38828f2076b984a595bf60f616599
+ca5729d547de06a258bfbbcd3094a321e4668cd43010f0ca8ecf931e5d39bda1152c50c3
+67b11c723f270729245d3ebdbd0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5
+060af7603cfd9ed186ebadd6163b50ae42bea5f6d14dda24e6d4687b434c175084515d56
+2e896742b0ba9a1c87d5642e10a5550379c71cc490a052ada483b5d96526c0a600fc5175
+5052aa77fdf72f7b4989b920e7b90f4b30787a46482670d5caecc7a515a926055ad5509d
+135702ce51a0e4c1033f2d939d8f0075ec3428e17310da37d3d2d7ad1ce99adc'::bytea,
+'\xc7c2d8046965d657020800eef8bf1515adb1a3ee7825f75c668ea8dd3e3f9d13e958f6ad
+9c55adc0c931a4bb00abe1d52cf7bb0c95d537949d277a5292ede375c6b2a67a3bf7d19f
+f975bb7e7be35c2d8300dacba360a0163567372f7dc24000cc7cb6170bedc8f3b1f98c12
+07a6cb4de870a4bc61319b139dcc0e20c368fd68f8fd346d2c0b69c5aed560504e2ec6f1
+23086fe3c5540dc4dd155c0c67257c4ada862f90fe172ace344089da8135e92aca5c2709
+f1c1bc521798bb8c0365841496e709bd184132d387e0c9d5f26dc00fd06c3a76ef66a75c
+138285038684707a847b7bd33cfbefbf1d336be954a8048946af97a66352adef8e8b5ae4
+c4748c6f2510265b7a8267bc370dbb00110100010007ff7e72d4f95d2d39901ac12ca5c5
+18e767e719e72340c3fab51c8c5ab1c40f31db8eaffe43533fa61e2dbca2c3f4396c0847
+e5434756acbb1f68128f4136bb135710c89137d74538908dac77967de9e821c559700dd9
+de5a2727eec1f5d12d5d74869dd1de45ed369d94a8814d23861dd163f8c27744b26b98f0
+239c2e6dd1e3493b8cc976fdc8f9a5e250f715aa4c3d7d5f237f8ee15d242e8fa941d1a0
+ed9550ab632d992a97518d142802cb0a97b251319bf5742db8d9d8cbaa06cdfba2d75bc9
+9d77a51ff20bd5ba7f15d7af6e85b904de2855d19af08d45f39deb85403033c69c767a8e
+74a343b1d6c8911d34ea441ac3850e57808ed3d885835cbe6c79d10400ef16256f3d5c4c
+3341516a2d2aa888df81b603f48a27f3666b40f992a857c1d11ff639cd764a9b42d5a1f8
+58b4aeee36b85508bb5e8b91ef88a7737770b330224479d9b44eae8c631bc43628b69549
+507c0a1af0be0dd7696015abea722b571eb35eefc4ab95595378ec12814727443f625fcd
+183bb9b3bccf53b54dd0e5e7a50400ffe08537b2d4e6074e4a1727b658cfccdec8962302
+25e300c05690de45f7065c3d40d86f544a64d51a3e94424f9851a16d1322ebdb41fa8a45
+3131f3e2dc94e858e6396722643df382680f815e53bcdcde5da622f50530a83b217f1103
+cdd6e5e9babe1e415bbff28d44bd18c95f43bbd04afeb2a2a99af38a571c7540de21df03
+ff62c0a33d9143dd3f639893f47732c11c5a12c6052d1935f4d507b7ae1f76ab0e9a69b8
+7305a7f7c19bd509daf4903bff614bc26d118f03e461469c72c12d3a2bb4f78e4d342ce8
+487723649a01ed2b9eb11c662134502c098d55dfcd361939d8370873422c3da75a515a75
+9ffedfe7df44fb3c20f81650801a30d43b5c90b98b3eee'::bytea);
+ERROR:  Public key too big
diff --git a/contrib/pgcrypto/meson.build b/contrib/pgcrypto/meson.build
index df7dd50dbc3..57ebfd7ae6d 100644
--- a/contrib/pgcrypto/meson.build
+++ b/contrib/pgcrypto/meson.build
@@ -50,6 +50,7 @@ pgcrypto_regress = [
   'pgp-encrypt',
   'pgp-pubkey-decrypt',
   'pgp-pubkey-encrypt',
+  'pgp-pubkey-session',
   'pgp-info',
 ]
 
diff --git a/contrib/pgcrypto/pgp-pgsql.c b/contrib/pgcrypto/pgp-pgsql.c
index d9b15b07b0f..838a7c381fc 100644
--- a/contrib/pgcrypto/pgp-pgsql.c
+++ b/contrib/pgcrypto/pgp-pgsql.c
@@ -631,6 +631,7 @@ pgp_sym_decrypt_text(PG_FUNCTION_ARGS)
 		arg = PG_GETARG_BYTEA_PP(2);
 
 	res = decrypt_internal(0, 1, data, key, NULL, arg);
+	pg_verifymbstr(VARDATA_ANY(res), VARSIZE_ANY_EXHDR(res), false);
 
 	PG_FREE_IF_COPY(data, 0);
 	PG_FREE_IF_COPY(key, 1);
@@ -732,6 +733,7 @@ pgp_pub_decrypt_text(PG_FUNCTION_ARGS)
 		arg = PG_GETARG_BYTEA_PP(3);
 
 	res = decrypt_internal(1, 1, data, key, psw, arg);
+	pg_verifymbstr(VARDATA_ANY(res), VARSIZE_ANY_EXHDR(res), false);
 
 	PG_FREE_IF_COPY(data, 0);
 	PG_FREE_IF_COPY(key, 1);
diff --git a/contrib/pgcrypto/pgp-pubdec.c b/contrib/pgcrypto/pgp-pubdec.c
index a0a5738a40e..2a13aa3e6ad 100644
--- a/contrib/pgcrypto/pgp-pubdec.c
+++ b/contrib/pgcrypto/pgp-pubdec.c
@@ -157,6 +157,7 @@ pgp_parse_pubenc_sesskey(PGP_Context *ctx, PullFilter *pkt)
 	uint8	   *msg;
 	int			msglen;
 	PGP_MPI    *m;
+	unsigned	sess_key_len;
 
 	pk = ctx->pub_key;
 	if (pk == NULL)
@@ -220,11 +221,19 @@ pgp_parse_pubenc_sesskey(PGP_Context *ctx, PullFilter *pkt)
 	if (res < 0)
 		goto out;
 
+	sess_key_len = msglen - 3;
+	if (sess_key_len > PGP_MAX_KEY)
+	{
+		px_debug("incorrect session key length=%u", sess_key_len);
+		res = PXE_PGP_KEY_TOO_BIG;
+		goto out;
+	}
+
 	/*
 	 * got sesskey
 	 */
 	ctx->cipher_algo = *msg;
-	ctx->sess_key_len = msglen - 3;
+	ctx->sess_key_len = sess_key_len;
 	memcpy(ctx->sess_key, msg + 1, ctx->sess_key_len);
 
 out:
diff --git a/contrib/pgcrypto/px.c b/contrib/pgcrypto/px.c
index d35ccca7774..a7cb248f6b7 100644
--- a/contrib/pgcrypto/px.c
+++ b/contrib/pgcrypto/px.c
@@ -65,6 +65,7 @@ static const struct error_desc px_err_list[] = {
 	{PXE_PGP_UNEXPECTED_PKT, "Unexpected packet in key data"},
 	{PXE_PGP_MATH_FAILED, "Math operation failed"},
 	{PXE_PGP_SHORT_ELGAMAL_KEY, "Elgamal keys must be at least 1024 bits long"},
+	{PXE_PGP_KEY_TOO_BIG, "Public key too big"},
 	{PXE_PGP_UNKNOWN_PUBALGO, "Unknown public-key encryption algorithm"},
 	{PXE_PGP_WRONG_KEY, "Wrong key"},
 	{PXE_PGP_MULTIPLE_KEYS,
diff --git a/contrib/pgcrypto/px.h b/contrib/pgcrypto/px.h
index 222d8b1ad2f..bda0524a04a 100644
--- a/contrib/pgcrypto/px.h
+++ b/contrib/pgcrypto/px.h
@@ -75,7 +75,7 @@
 /* -108 is unused */
 #define PXE_PGP_MATH_FAILED			-109
 #define PXE_PGP_SHORT_ELGAMAL_KEY	-110
-/* -111 is unused */
+#define PXE_PGP_KEY_TOO_BIG			-111
 #define PXE_PGP_UNKNOWN_PUBALGO		-112
 #define PXE_PGP_WRONG_KEY			-113
 #define PXE_PGP_MULTIPLE_KEYS		-114
diff --git a/contrib/pgcrypto/scripts/pgp_session_data.py b/contrib/pgcrypto/scripts/pgp_session_data.py
new file mode 100644
index 00000000000..999350bb2bc
--- /dev/null
+++ b/contrib/pgcrypto/scripts/pgp_session_data.py
@@ -0,0 +1,491 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Generate PGP data to check the session key length of the input data provided
+# to pgp_pub_decrypt_bytea().
+#
+# First, the crafted data is generated from valid RSA data, freshly generated
+# by this script each time it is run, see generate_rsa_keypair().
+# Second, the crafted PGP data is built, see build_message_data() and
+# build_key_data().  Finally, the resulting SQL script is generated.
+#
+# This script generates in stdout the SQL file that is used in the regression
+# tests of pgcrypto.  The following command can be used to regenerate the file
+# which should never be manually manipulated:
+# python3 scripts/pgp_session_data.py > sql/pgp-pubkey-session.sql
+
+import os
+import re
+import struct
+import secrets
+import sys
+import time
+
+# pwn for binary manipulation (p32, p64)
+from pwn import *
+
+# Cryptographic libraries, to craft the PGP data.
+from Crypto.Cipher import AES
+from Crypto.PublicKey import RSA
+from Crypto.Util.number import inverse
+
+# AES key used for session key encryption (16 bytes for AES-128)
+AES_KEY = b'\x01' * 16
+
+def generate_rsa_keypair(key_size: int = 2048) -> dict:
+    """
+    Generate a fresh RSA key pair.
+
+    The generated key includes all components needed for PGP operations:
+    - n: public modulus (p * q)
+    - e: public exponent (typically 65537)
+    - d: private exponent (e^-1 mod phi(n))
+    - p, q: prime factors of n
+    - u: coefficient (p^-1 mod q) for CRT optimization
+
+    The caller can pass the wanted key size in input, for a default of 2048
+    bytes.  This function returns the RSA key components, after performing
+    some validation on them.
+    """
+
+    start_time = time.time()
+
+    # Generate RSA key
+    key = RSA.generate(key_size)
+
+    # Extract all key components
+    rsa_components = {
+        'n': key.n,      # Public modulus (p * q)
+        'e': key.e,      # Public exponent (typically 65537)
+        'd': key.d,      # Private exponent (e^-1 mod phi(n))
+        'p': key.p,      # First prime factor
+        'q': key.q,      # Second prime factor
+        'u': inverse(key.p, key.q)  # Coefficient for CRT: p^-1 mod q
+    }
+
+    # Validate key components for correctness
+    validate_rsa_key(rsa_components)
+
+    return rsa_components
+
+def validate_rsa_key(rsa: dict) -> None:
+    """
+    Validate a generated RSA key.
+
+    This function performs basic validation to ensure the RSA key is properly
+    constructed and all components are consistent, at least mathematically.
+
+    Validations performed:
+    1. n = p * q (modulus is product of primes)
+    2. gcd(e, phi(n)) = 1 (public exponent is coprime to phi(n))
+    3. (d * e) mod(phi(n)) = 1 (private exponent is multiplicative inverse)
+    4. (u * p) (mod q) = 1 (coefficient is correct for CRT)
+    """
+
+    n, e, d, p, q, u = rsa['n'], rsa['e'], rsa['d'], rsa['p'], rsa['q'], rsa['u']
+
+    # Check that n = p * q
+    if n != p * q:
+        raise ValueError("RSA validation failed: n <> p * q")
+
+    # Check that p and q are different
+    if p == q:
+        raise ValueError("RSA validation failed: p = q (not allowed)")
+
+    # Calculate phi(n) = (p-1)(q-1)
+    phi_n = (p - 1) * (q - 1)
+
+    # Check that gcd(e, phi(n)) = 1
+    def gcd(a, b):
+        while b:
+            a, b = b, a % b
+        return a
+
+    if gcd(e, phi_n) != 1:
+        raise ValueError("RSA validation failed: gcd(e, phi(n)) <> 1")
+
+    # Check that (d * e) mod(phi(n)) = 1
+    if (d * e) % phi_n != 1:
+        raise ValueError("RSA validation failed: d * e <> 1 (mod phi(n))")
+
+    # Check that (u * p) (mod q) = 1
+    if (u * p) % q != 1:
+        raise ValueError("RSA validation failed: u * p <> 1 (mod q)")
+
+def mpi_encode(x: int) -> bytes:
+    """
+    Encode an integer as an OpenPGP Multi-Precision Integer (MPI).
+
+    Format (RFC 4880, Section 3.2):
+    - 2 bytes: bit length of the integer (big-endian)
+    - N bytes: the integer in big-endian format
+
+    This is used to encode RSA key components (n, e, d, p, q, u) in PGP
+    packets.
+
+    The integer to encode is given in input, returning an MPI-encoded
+    integer.
+
+    For example:
+        mpi_encode(65537) -> b'\x00\x11\x01\x00\x01'
+        (17 bits, value 0x010001)
+    """
+    if x < 0:
+        raise ValueError("MPI cannot encode negative integers")
+
+    if x == 0:
+        # Special case: zero has 0 bits and empty magnitude
+        bits = 0
+        mag = b""
+    else:
+        # Calculate bit length and convert to bytes
+        bits = x.bit_length()
+        mag = x.to_bytes((bits + 7) // 8, 'big')
+
+    # Pack: 2-byte bit length + magnitude bytes
+    return struct.pack('>H', bits) + mag
+
+def new_packet(tag: int, payload: bytes) -> bytes:
+    """
+    Create a new OpenPGP packet with a proper header.
+
+    OpenPGP packet format (RFC 4880, Section 4.2):
+    - New packet format: 0xC0 | tag
+    - Length encoding depends on payload size:
+      * 0-191: single byte
+      * 192-8383: two bytes (192 + ((length - 192) >> 8), (length - 192) & 0xFF)
+      * 8384+: five bytes (0xFF + 4-byte big-endian length)
+
+    The packet is built from a "tag" (1-63) and some "payload" data.  The
+    result generated is a complete OpenPGP packet.
+
+    For example:
+        new_packet(1, b'data') -> b'\xC1\x04data'
+        (Tag 1, length 4, payload 'data')
+    """
+    # New packet format: set bit 7 and 6, clear bit 5, tag in bits 0-5
+    first = 0xC0 | (tag & 0x3F)
+    ln = len(payload)
+
+    # Encode length according to OpenPGP specification
+    if ln <= 191:
+        # Single byte length for small packets
+        llen = bytes([ln])
+    elif ln <= 8383:
+        # Two-byte length for medium packets
+        ln2 = ln - 192
+        llen = bytes([192 + (ln2 >> 8), ln2 & 0xFF])
+    else:
+        # Five-byte length for large packets
+        llen = bytes([255]) + struct.pack('>I', ln)
+
+    return bytes([first]) + llen + payload
+
+def build_key_data(rsa: dict) -> bytes:
+    """
+    Build the key data, containing an RSA private key.
+
+    The RSA contents should have been generated previously.
+
+    Format (see RFC 4880, Section 5.5.3):
+    - 1 byte: version (4)
+    - 4 bytes: creation time (current Unix timestamp)
+    - 1 byte: public key algorithm (2 = RSA encrypt)
+    - MPI: RSA public modulus n
+    - MPI: RSA public exponent e
+    - 1 byte: string-to-key usage (0 = no encryption)
+    - MPI: RSA private exponent d
+    - MPI: RSA prime p
+    - MPI: RSA prime q
+    - MPI: RSA coefficient u = p^-1 mod q
+    - 2 bytes: checksum of private key material
+
+    This function takes a set of RSA key components in input (n, e, d, p, q, u)
+    and returns a secret key packet.
+    """
+
+    # Public key portion
+    ver = bytes([4])                           # Version 4 key
+    ctime = struct.pack('>I', int(time.time())) # Current Unix timestamp
+    algo = bytes([2])                          # RSA encrypt algorithm
+    n_mpi = mpi_encode(rsa['n'])               # Public modulus
+    e_mpi = mpi_encode(rsa['e'])               # Public exponent
+    pub = ver + ctime + algo + n_mpi + e_mpi
+
+    # Private key portion
+    hide_type = bytes([0])              # No string-to-key encryption
+    d_mpi = mpi_encode(rsa['d'])        # Private exponent
+    p_mpi = mpi_encode(rsa['p'])        # Prime p
+    q_mpi = mpi_encode(rsa['q'])        # Prime q
+    u_mpi = mpi_encode(rsa['u'])        # Coefficient u = p^-1 mod q
+
+    # Calculate checksum of private key material (simple sum mod 65536)
+    private_data = d_mpi + p_mpi + q_mpi + u_mpi
+    cksum = sum(private_data) & 0xFFFF
+
+    secret = hide_type + private_data + struct.pack('>H', cksum)
+    payload = pub + secret
+
+    return new_packet(7, payload)
+
+def pgp_cfb_encrypt_resync(key, plaintext):
+    """
+    Implement OpenPGP CFB mode with resync.
+
+    OpenPGP CFB mode is a variant of standard CFB with a resync operation
+    after the first two blocks.
+
+    Algorithm (RFC 4880, Section 13.9):
+    1. Block 1: FR=zeros, encrypt full block_size bytes
+    2. Block 2: FR=block1, encrypt only 2 bytes
+    3. Resync: FR = block1[2:] + block2
+    4. Remaining blocks: standard CFB mode
+
+    This function uses the following arguments:
+    - key: AES encryption key (16 bytes for AES-128)
+    - plaintext: Data to encrypt
+    """
+    block_size = 16  # AES block size
+    cipher = AES.new(key[:16], AES.MODE_ECB)  # Use ECB for manual CFB
+    ciphertext = b''
+
+    # Block 1: FR=zeros, encrypt full 16 bytes
+    FR = b'\x00' * block_size
+    FRE = cipher.encrypt(FR)  # Encrypt the feedback register
+    block1 = bytes(a ^ b for a, b in zip(FRE, plaintext[0:16]))
+    ciphertext += block1
+
+    # Block 2: FR=block1, encrypt only 2 bytes
+    FR = block1
+    FRE = cipher.encrypt(FR)
+    block2 = bytes(a ^ b for a, b in zip(FRE[0:2], plaintext[16:18]))
+    ciphertext += block2
+
+    # Resync: FR = block1[2:16] + block2[0:2]
+    # This is the key difference from standard CFB mode
+    FR = block1[2:] + block2
+
+    # Block 3+: Continue with standard CFB mode
+    pos = 18
+    while pos < len(plaintext):
+        FRE = cipher.encrypt(FR)
+        chunk_len = min(block_size, len(plaintext) - pos)
+        chunk = plaintext[pos:pos+chunk_len]
+        enc_chunk = bytes(a ^ b for a, b in zip(FRE[:chunk_len], chunk))
+        ciphertext += enc_chunk
+
+        # Update feedback register for next iteration
+        if chunk_len == block_size:
+            FR = enc_chunk
+        else:
+            # Partial block: pad with old FR bytes
+            FR = enc_chunk + FR[chunk_len:]
+        pos += chunk_len
+
+    return ciphertext
+
+def build_literal_data_packet(data: bytes) -> bytes:
+    """
+    Build a literal data packet containing a message.
+
+    Format (RFC 4880, Section 5.9):
+    - 1 byte: data format ('b' = binary, 't' = text, 'u' = UTF-8 text)
+    - 1 byte: filename length (0 = no filename)
+    - N bytes: filename (empty in this case)
+    - 4 bytes: date (current Unix timestamp)
+    - M bytes: literal data
+
+    The data used to build the packet is given in input, with the generated
+    result returned.
+    """
+    body = bytes([
+        ord('b'),                              # Binary data format
+        0,                                     # Filename length (0 = no filename)
+    ]) + struct.pack('>I', int(time.time())) + data  # Current timestamp + data
+
+    return new_packet(11, body)
+
+def build_symenc_data_packet(sess_key: bytes, cipher_algo: int, payload: bytes) -> bytes:
+    """
+    Build a symmetrically-encrypted data packet using AES-128-CFB.
+
+    This packet contains encrypted data using the session key. The format
+    includes a random prefix, for security (see RFC 4880, Section 5.7).
+
+    Packet structure:
+    - Random prefix (block_size bytes)
+    - Prefix repeat (last 2 bytes of prefix repeated)
+    - Encrypted literal data packet
+
+    This function uses the following set of arguments:
+    - sess_key: Session key for encryption
+    - cipher_algo: Cipher algorithm identifier (7 = AES-128)
+    - payload: Data to encrypt (wrapped in literal data packet)
+    """
+    block_size = 16  # AES-128 block size
+    key = sess_key[:16]  # Use first 16 bytes for AES-128
+
+    # Create random prefix + repeat last 2 bytes (total 18 bytes)
+    # This is required by OpenPGP for integrity checking
+    prefix_random = secrets.token_bytes(block_size)
+    prefix = prefix_random + prefix_random[-2:]  # 18 bytes total
+
+    # Wrap payload in literal data packet
+    literal_pkt = build_literal_data_packet(payload)
+
+    # Plaintext = prefix + literal data packet
+    plaintext = prefix + literal_pkt
+
+    # Encrypt using OpenPGP CFB mode with resync
+    ciphertext = pgp_cfb_encrypt_resync(key, plaintext)
+
+    return new_packet(9, ciphertext)
+
+def build_tag1_packet(rsa: dict, sess_key: bytes) -> bytes:
+    """
+    Build a public-key encrypted key.
+
+    This is a very important function, as it is able to create the packet
+    triggering the overflow check.  This function can also be used to create
+    "legit" packet data.
+
+    Format (RFC 4880, Section 5.1):
+    - 1 byte: version (3)
+    - 8 bytes: key ID (0 = any key accepted)
+    - 1 byte: public key algorithm (2 = RSA encrypt)
+    - MPI: RSA-encrypted session key
+
+    This uses in arguments the generated RSA key pair, and the session key
+    to encrypt.  The latter is manipulated to trigger the overflow.
+
+    This function returns a complete packet encrypted by a session key.
+    """
+
+    # Calculate RSA modulus size in bytes
+    n_bytes = (rsa['n'].bit_length() + 7) // 8
+
+    # Session key message format:
+    # - 1 byte: symmetric cipher algorithm (7 = AES-128)
+    # - N bytes: session key
+    # - 2 bytes: checksum (simple sum of session key bytes)
+    algo_byte = bytes([7])  # AES-128 algorithm identifier
+    cksum = sum(sess_key) & 0xFFFF  # 16-bit checksum
+    M = algo_byte + sess_key + struct.pack('>H', cksum)
+
+    # PKCS#1 v1.5 padding construction
+    # Format: 0x02 || PS || 0x00 || M
+    # Total padded message must be exactly n_bytes long.
+    total_len = n_bytes  # Total length must equal modulus size in bytes
+    ps_len = total_len - len(M) - 2  # Subtract 2 for 0x02 and 0x00 bytes
+
+    if ps_len < 8:
+        raise ValueError(f"Padding string too short ({ps_len} bytes); need at least 8 bytes. "
+                        f"Message length: {len(M)}, Modulus size: {n_bytes} bytes")
+
+    # Create padding string with *ALL* bytes being 0xFF (no zero separator!)
+    PS = bytes([0xFF]) * ps_len
+
+    # Construct the complete padded message
+    # Normal PKCS#1 v1.5 padding: 0x02 || PS || 0x00 || M
+    padded = bytes([0x02]) + PS + bytes([0x00]) + M
+
+    # Verify padding construction
+    if len(padded) != n_bytes:
+        raise ValueError(f"Padded message length ({len(padded)}) doesn't match RSA modulus size ({n_bytes})")
+
+    # Convert padded message to integer and encrypt with RSA
+    m_int = int.from_bytes(padded, 'big')
+
+    # Ensure message is smaller than modulus (required for RSA)
+    if m_int >= rsa['n']:
+        raise ValueError("Padded message is larger than RSA modulus")
+
+    # RSA encryption: c = m^e mod n
+    c_int = pow(m_int, rsa['e'], rsa['n'])
+
+    # Encode encrypted result as MPI
+    c_mpi = mpi_encode(c_int)
+
+    # Build complete packet
+    ver = bytes([3])           # Version 3 packet
+    key_id = b"\x00" * 8      # Key ID (0 = any key accepted)
+    algo = bytes([2])         # RSA encrypt algorithm
+    payload = ver + key_id + algo + c_mpi
+
+    return new_packet(1, payload)
+
+def build_message_data(rsa: dict) -> bytes:
+    """
+    This function creates a crafted message, with a long session key
+    length.
+
+    This takes in input the RSA key components generated previously,
+    returning a concatenated set of PGP packets crafted for the purpose
+    of this test.
+    """
+
+    # Base prefix for session key (AES key + padding + size).
+    # Note that the crafted size is the important part for this test.
+    prefix = AES_KEY + b"\x00" * 16 + p32(0x10)
+
+    # Build encrypted data packet, legit.
+    sedata = build_symenc_data_packet(AES_KEY, cipher_algo=7, payload=b"\x0a\x00")
+
+    # Build multiple packets
+    packets = [
+        # First packet, legit.
+        build_tag1_packet(rsa, prefix),
+
+        # Encrypted data packet, legit.
+        sedata,
+
+        # Second packet: information payload.
+        #
+        # This packet contains a longer-crafted session key, able to trigger
+        # the overflow check in pgcrypto.  This is the critical part, and
+        # and you are right to pay a lot of attention here if you are
+        # reading this code.
+        build_tag1_packet(rsa, prefix)
+    ]
+
+    return b"".join(packets)
+
+def main():
+    # Default key size.
+    # This number can be set to a higher number if wanted, like 4096.  We
+    # just do not need to do that here.
+    key_size = 2048
+
+    # Generate fresh RSA key pair
+    rsa = generate_rsa_keypair(key_size)
+
+    # Generate the message data.
+    print("### Building message data", file=sys.stderr)
+    message_data = build_message_data(rsa)
+
+    # Build the key containing the RSA private key
+    print("### Building key data", file=sys.stderr)
+    key_data = build_key_data(rsa)
+
+    # Convert to hexadecimal, for the bytea used in the SQL file.
+    message_data = message_data.hex()
+    key_data = key_data.hex()
+
+    # Split each value into lines of 72 characters, for readability.
+    message_data = re.sub("(.{72})", "\\1\n", message_data, 0, re.DOTALL)
+    key_data = re.sub("(.{72})", "\\1\n", key_data, 0, re.DOTALL)
+
+    # Get the script filename for documentation
+    file_basename = os.path.basename(__file__)
+
+    # Output the SQL test case
+    print(f'''-- Test for overflow with session key at decrypt.
+-- Data automatically generated by scripts/{file_basename}.
+-- See this file for details explaining how this data is generated.
+SELECT pgp_pub_decrypt_bytea(
+'\\x{message_data}'::bytea,
+'\\x{key_data}'::bytea);''',
+          file=sys.stdout)
+
+if __name__ == "__main__":
+    main()
diff --git a/contrib/pgcrypto/sql/pgp-decrypt.sql b/contrib/pgcrypto/sql/pgp-decrypt.sql
index 49a0267bbcb..2fe498f2f02 100644
--- a/contrib/pgcrypto/sql/pgp-decrypt.sql
+++ b/contrib/pgcrypto/sql/pgp-decrypt.sql
@@ -228,7 +228,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ==
 -----END PGP MESSAGE-----
 '), '0123456789abcdefghij'), 'sha1');
 
-select digest(pgp_sym_decrypt(dearmor('
+select digest(pgp_sym_decrypt_bytea(dearmor('
 -----BEGIN PGP MESSAGE-----
 Comment: dat3.aes.sha1.mdc.s2k3.z0
 
@@ -282,6 +282,26 @@ VsxxqLSPzNLAeIspJk5G
 -- Routine text/binary mismatch.
 select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1');
 
+-- NUL byte in text decrypt.  Ciphertext source:
+-- printf 'a\x00\xc' | gpg --homedir /nonexistent --textmode \
+--      --personal-cipher-preferences aes --no-emit-version --batch \
+--      --symmetric --passphrase key --armor
+do $$
+begin
+  perform pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+
+jA0EBwMCLd9OvySmZNZg0jgBe7vGTmnje5HGXI+zsIQ99WPZu4Zs/P6pQcZ+HZ4n
+SZQHOfE8tagjB6Rqow82QpSBiOfWn4qjhQ==
+=c2cz
+-----END PGP MESSAGE-----
+'), 'key', 'debug=1');
+exception when others then
+  raise '%',
+    regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]');
+end
+$$;
+
 -- Decryption with a certain incorrect key yields an apparent BZip2-compressed
 -- plaintext.  Ciphertext source: iterative pgp_sym_encrypt('secret', 'key')
 -- until the random prefix gave rise to that property.
diff --git a/contrib/pgcrypto/sql/pgp-pubkey-session.sql b/contrib/pgcrypto/sql/pgp-pubkey-session.sql
new file mode 100644
index 00000000000..51792f1f4d8
--- /dev/null
+++ b/contrib/pgcrypto/sql/pgp-pubkey-session.sql
@@ -0,0 +1,46 @@
+-- Test for overflow with session key at decrypt.
+-- Data automatically generated by scripts/pgp_session_data.py.
+-- See this file for details explaining how this data is generated.
+SELECT pgp_pub_decrypt_bytea(
+'\xc1c04c030000000000000000020800a46f5b9b1905b49457a6485474f71ed9b46c2527e1
+da08e1f7871e12c3d38828f2076b984a595bf60f616599ca5729d547de06a258bfbbcd30
+94a321e4668cd43010f0ca8ecf931e5d39bda1152c50c367b11c723f270729245d3ebdbd
+0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5060af7603cfd9ed186ebadd616
+3b50ae42bea5f6d14dda24e6d4687b434c175084515d562e896742b0ba9a1c87d5642e10
+a5550379c71cc490a052ada483b5d96526c0a600fc51755052aa77fdf72f7b4989b920e7
+b90f4b30787a46482670d5caecc7a515a926055ad5509d135702ce51a0e4c1033f2d939d
+8f0075ec3428e17310da37d3d2d7ad1ce99adcc91cd446c366c402ae1ee38250343a7fcc
+0f8bc28020e603d7a4795ef0dcc1c04c030000000000000000020800a46f5b9b1905b494
+57a6485474f71ed9b46c2527e1da08e1f7871e12c3d38828f2076b984a595bf60f616599
+ca5729d547de06a258bfbbcd3094a321e4668cd43010f0ca8ecf931e5d39bda1152c50c3
+67b11c723f270729245d3ebdbd0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5
+060af7603cfd9ed186ebadd6163b50ae42bea5f6d14dda24e6d4687b434c175084515d56
+2e896742b0ba9a1c87d5642e10a5550379c71cc490a052ada483b5d96526c0a600fc5175
+5052aa77fdf72f7b4989b920e7b90f4b30787a46482670d5caecc7a515a926055ad5509d
+135702ce51a0e4c1033f2d939d8f0075ec3428e17310da37d3d2d7ad1ce99adc'::bytea,
+'\xc7c2d8046965d657020800eef8bf1515adb1a3ee7825f75c668ea8dd3e3f9d13e958f6ad
+9c55adc0c931a4bb00abe1d52cf7bb0c95d537949d277a5292ede375c6b2a67a3bf7d19f
+f975bb7e7be35c2d8300dacba360a0163567372f7dc24000cc7cb6170bedc8f3b1f98c12
+07a6cb4de870a4bc61319b139dcc0e20c368fd68f8fd346d2c0b69c5aed560504e2ec6f1
+23086fe3c5540dc4dd155c0c67257c4ada862f90fe172ace344089da8135e92aca5c2709
+f1c1bc521798bb8c0365841496e709bd184132d387e0c9d5f26dc00fd06c3a76ef66a75c
+138285038684707a847b7bd33cfbefbf1d336be954a8048946af97a66352adef8e8b5ae4
+c4748c6f2510265b7a8267bc370dbb00110100010007ff7e72d4f95d2d39901ac12ca5c5
+18e767e719e72340c3fab51c8c5ab1c40f31db8eaffe43533fa61e2dbca2c3f4396c0847
+e5434756acbb1f68128f4136bb135710c89137d74538908dac77967de9e821c559700dd9
+de5a2727eec1f5d12d5d74869dd1de45ed369d94a8814d23861dd163f8c27744b26b98f0
+239c2e6dd1e3493b8cc976fdc8f9a5e250f715aa4c3d7d5f237f8ee15d242e8fa941d1a0
+ed9550ab632d992a97518d142802cb0a97b251319bf5742db8d9d8cbaa06cdfba2d75bc9
+9d77a51ff20bd5ba7f15d7af6e85b904de2855d19af08d45f39deb85403033c69c767a8e
+74a343b1d6c8911d34ea441ac3850e57808ed3d885835cbe6c79d10400ef16256f3d5c4c
+3341516a2d2aa888df81b603f48a27f3666b40f992a857c1d11ff639cd764a9b42d5a1f8
+58b4aeee36b85508bb5e8b91ef88a7737770b330224479d9b44eae8c631bc43628b69549
+507c0a1af0be0dd7696015abea722b571eb35eefc4ab95595378ec12814727443f625fcd
+183bb9b3bccf53b54dd0e5e7a50400ffe08537b2d4e6074e4a1727b658cfccdec8962302
+25e300c05690de45f7065c3d40d86f544a64d51a3e94424f9851a16d1322ebdb41fa8a45
+3131f3e2dc94e858e6396722643df382680f815e53bcdcde5da622f50530a83b217f1103
+cdd6e5e9babe1e415bbff28d44bd18c95f43bbd04afeb2a2a99af38a571c7540de21df03
+ff62c0a33d9143dd3f639893f47732c11c5a12c6052d1935f4d507b7ae1f76ab0e9a69b8
+7305a7f7c19bd509daf4903bff614bc26d118f03e461469c72c12d3a2bb4f78e4d342ce8
+487723649a01ed2b9eb11c662134502c098d55dfcd361939d8370873422c3da75a515a75
+9ffedfe7df44fb3c20f81650801a30d43b5c90b98b3eee'::bytea);
diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c
index 64c879e5470..554843b3548 100644
--- a/contrib/unaccent/unaccent.c
+++ b/contrib/unaccent/unaccent.c
@@ -149,9 +149,9 @@ initTrie(const char *filename)
 				state = 0;
 				for (ptr = line; *ptr; ptr += ptrlen)
 				{
-					ptrlen = pg_mblen(ptr);
+					ptrlen = pg_mblen_cstr(ptr);
 					/* ignore whitespace, but end src or trg */
-					if (t_isspace(ptr))
+					if (t_isspace_cstr(ptr))
 					{
 						if (state == 1)
 							state = 2;
@@ -315,6 +315,7 @@ unaccent_lexize(PG_FUNCTION_ARGS)
 	char	   *srcchar = (char *) PG_GETARG_POINTER(1);
 	int32		len = PG_GETARG_INT32(2);
 	char	   *srcstart = srcchar;
+	const char *srcend = srcstart + len;
 	TSLexeme   *res;
 	StringInfoData buf;
 
@@ -342,7 +343,7 @@ unaccent_lexize(PG_FUNCTION_ARGS)
 		}
 		else
 		{
-			matchlen = pg_mblen(srcchar);
+			matchlen = pg_mblen_range(srcchar, srcend);
 			if (buf.data != NULL)
 				appendBinaryStringInfo(&buf, srcchar, matchlen);
 		}
diff --git a/pom.xml b/pom.xml
index 43c583614a6..0b461661554 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1802,6 +1802,7 @@ code or new licensing patterns.
             <exclude>contrib/btree_gist/btree_bool.c</exclude>
             <exclude>contrib/basic_archive/basic_archive.conf</exclude>
             <exclude>contrib/pg_freespacemap/pg_freespacemap.conf</exclude>
+            <exclude>contrib/pgcrypto/scripts/pgp_session_data.py</exclude>
             <exclude>contrib/pg_walinspect/walinspect.conf</exclude>
             <exclude>contrib/pgrowlocks/specs/pgrowlocks.spec</exclude>
             <exclude>contrib/tcn/specs/tcn.spec</exclude>
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index ac21884162f..933aa42ffc9 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -235,6 +235,7 @@ hashoidvector(PG_FUNCTION_ARGS)
 {
 	oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
 
+	check_valid_oidvector(key);
 	return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
 }
 
@@ -243,6 +244,7 @@ hashoidvectorextended(PG_FUNCTION_ARGS)
 {
 	oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
 
+	check_valid_oidvector(key);
 	return hash_any_extended((unsigned char *) key->values,
 							 key->dim1 * sizeof(Oid),
 							 PG_GETARG_INT64(1));
diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c
index 976a2cc6447..720733b75d2 100644
--- a/src/backend/access/nbtree/nbtcompare.c
+++ b/src/backend/access/nbtree/nbtcompare.c
@@ -299,6 +299,9 @@ btoidvectorcmp(PG_FUNCTION_ARGS)
 	oidvector  *b = (oidvector *) PG_GETARG_POINTER(1);
 	int			i;
 
+	check_valid_oidvector(a);
+	check_valid_oidvector(b);
+
 	/* We arbitrarily choose to sort first by vector length */
 	if (a->dim1 != b->dim1)
 		PG_RETURN_INT32(a->dim1 - b->dim1);
diff --git a/src/backend/catalog/pg_depend.c b/src/backend/catalog/pg_depend.c
index 02e0ce71a07..b3d1c2fba99 100644
--- a/src/backend/catalog/pg_depend.c
+++ b/src/backend/catalog/pg_depend.c
@@ -23,11 +23,13 @@
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_depend.h"
 #include "catalog/pg_extension.h"
+#include "catalog/pg_type.h"
 #include "commands/extension.h"
 #include "miscadmin.h"
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
+#include "utils/syscache.h"
 
 
 static bool isObjectPinned(const ObjectAddress *object);
@@ -812,6 +814,77 @@ getAutoExtensionsOfObject(Oid classId, Oid objectId)
 	return result;
 }
 
+/*
+ * Look up a type belonging to an extension.
+ *
+ * Returns the type's OID, or InvalidOid if not found.
+ *
+ * Notice that the type is specified by name only, without a schema.
+ * That's because this will typically be used by relocatable extensions
+ * which can't make a-priori assumptions about which schema their objects
+ * are in.  As long as the extension only defines one type of this name,
+ * the answer is unique anyway.
+ *
+ * We might later add the ability to look up functions, operators, etc.
+ */
+Oid
+getExtensionType(Oid extensionOid, const char *typname)
+{
+	Oid			result = InvalidOid;
+	Relation	depRel;
+	ScanKeyData key[3];
+	SysScanDesc scan;
+	HeapTuple	tup;
+
+	depRel = table_open(DependRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(ExtensionRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(extensionOid));
+	ScanKeyInit(&key[2],
+				Anum_pg_depend_refobjsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum(0));
+
+	scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+							  NULL, 3, key);
+
+	while (HeapTupleIsValid(tup = systable_getnext(scan)))
+	{
+		Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup);
+
+		if (depform->classid == TypeRelationId &&
+			depform->deptype == DEPENDENCY_EXTENSION)
+		{
+			Oid			typoid = depform->objid;
+			HeapTuple	typtup;
+
+			typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typoid));
+			if (!HeapTupleIsValid(typtup))
+				continue;		/* should we throw an error? */
+			if (strcmp(NameStr(((Form_pg_type) GETSTRUCT(typtup))->typname),
+					   typname) == 0)
+			{
+				result = typoid;
+				ReleaseSysCache(typtup);
+				break;			/* no need to keep searching */
+			}
+			ReleaseSysCache(typtup);
+		}
+	}
+
+	systable_endscan(scan);
+
+	table_close(depRel, AccessShareLock);
+
+	return result;
+}
+
 /*
  * Detect whether a sequence is marked as "owned" by a column
  *
diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c
index 6285ce06f66..f46e854dd81 100644
--- a/src/backend/catalog/pg_proc.c
+++ b/src/backend/catalog/pg_proc.c
@@ -1227,7 +1227,7 @@ match_prosrc_to_literal(const char *prosrc, const char *literal,
 			if (cursorpos > 0)
 				newcp++;
 		}
-		chlen = pg_mblen(prosrc);
+		chlen = pg_mblen_cstr(prosrc);
 		if (strncmp(prosrc, literal, chlen) != 0)
 			goto fail;
 		prosrc += chlen;
diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c
index 41e06471f4e..8ec1ffdf209 100644
--- a/src/backend/commands/extension.c
+++ b/src/backend/commands/extension.c
@@ -46,6 +46,7 @@
 #include "catalog/pg_depend.h"
 #include "catalog/pg_extension.h"
 #include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
 #include "catalog/pg_type.h"
 #include "cdb/cdbgang.h"
 #include "commands/alter.h"
@@ -63,10 +64,12 @@
 #include "utils/builtins.h"
 #include "utils/conffiles.h"
 #include "utils/fmgroids.h"
+#include "utils/inval.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 #include "utils/snapmgr.h"
+#include "utils/syscache.h"
 #include "utils/varlena.h"
 
 #include "catalog/oid_dispatch.h"
@@ -114,7 +117,26 @@ typedef struct ExtensionVersionInfo
 	struct ExtensionVersionInfo *previous;	/* current best predecessor */
 } ExtensionVersionInfo;
 
+/*
+ * Cache structure for get_function_sibling_type (and maybe later,
+ * allied lookup functions).
+ */
+typedef struct ExtensionSiblingCache
+{
+	struct ExtensionSiblingCache *next; /* list link */
+	/* lookup key: requesting function's OID and type name */
+	Oid			reqfuncoid;
+	const char *typname;
+	bool		valid;			/* is entry currently valid? */
+	uint32		exthash;		/* cache hash of owning extension's OID */
+	Oid			typeoid;		/* OID associated with typname */
+} ExtensionSiblingCache;
+
+/* Head of linked list of ExtensionSiblingCache structs */
+static ExtensionSiblingCache *ext_sibling_list = NULL;
+
 /* Local functions */
+static void ext_sibling_callback(Datum arg, int cacheid, uint32 hashvalue);
 static List *find_update_path(List *evi_list,
 							  ExtensionVersionInfo *evi_start,
 							  ExtensionVersionInfo *evi_target,
@@ -264,6 +286,114 @@ get_extension_schema(Oid ext_oid)
 	return result;
 }
 
+/*
+ * get_function_sibling_type - find a type belonging to same extension as func
+ *
+ * Returns the type's OID, or InvalidOid if not found.
+ *
+ * This is useful in extensions, which won't have fixed object OIDs.
+ * We work from the calling function's own OID, which it can get from its
+ * FunctionCallInfo parameter, and look up the owning extension and thence
+ * a type belonging to the same extension.
+ *
+ * Notice that the type is specified by name only, without a schema.
+ * That's because this will typically be used by relocatable extensions
+ * which can't make a-priori assumptions about which schema their objects
+ * are in.  As long as the extension only defines one type of this name,
+ * the answer is unique anyway.
+ *
+ * We might later add the ability to look up functions, operators, etc.
+ *
+ * This code is simply a frontend for some pg_depend lookups.  Those lookups
+ * are fairly expensive, so we provide a simple cache facility.  We assume
+ * that the passed typname is actually a C constant, or at least permanently
+ * allocated, so that we need not copy that string.
+ */
+Oid
+get_function_sibling_type(Oid funcoid, const char *typname)
+{
+	ExtensionSiblingCache *cache_entry;
+	Oid			extoid;
+	Oid			typeoid;
+
+	/*
+	 * See if we have the answer cached.  Someday there may be enough callers
+	 * to justify a hash table, but for now, a simple linked list is fine.
+	 */
+	for (cache_entry = ext_sibling_list; cache_entry != NULL;
+		 cache_entry = cache_entry->next)
+	{
+		if (funcoid == cache_entry->reqfuncoid &&
+			strcmp(typname, cache_entry->typname) == 0)
+			break;
+	}
+	if (cache_entry && cache_entry->valid)
+		return cache_entry->typeoid;
+
+	/*
+	 * Nope, so do the expensive lookups.  We do not expect failures, so we do
+	 * not cache negative results.
+	 */
+	extoid = getExtensionOfObject(ProcedureRelationId, funcoid);
+	if (!OidIsValid(extoid))
+		return InvalidOid;
+	typeoid = getExtensionType(extoid, typname);
+	if (!OidIsValid(typeoid))
+		return InvalidOid;
+
+	/*
+	 * Build, or revalidate, cache entry.
+	 */
+	if (cache_entry == NULL)
+	{
+		/* Register invalidation hook if this is first entry */
+		if (ext_sibling_list == NULL)
+			CacheRegisterSyscacheCallback(EXTENSIONOID,
+										  ext_sibling_callback,
+										  (Datum) 0);
+
+		/* Momentarily zero the space to ensure valid flag is false */
+		cache_entry = (ExtensionSiblingCache *)
+			MemoryContextAllocZero(CacheMemoryContext,
+								   sizeof(ExtensionSiblingCache));
+		cache_entry->next = ext_sibling_list;
+		ext_sibling_list = cache_entry;
+	}
+
+	cache_entry->reqfuncoid = funcoid;
+	cache_entry->typname = typname;
+	cache_entry->exthash = GetSysCacheHashValue1(EXTENSIONOID,
+												 ObjectIdGetDatum(extoid));
+	cache_entry->typeoid = typeoid;
+	/* Mark it valid only once it's fully populated */
+	cache_entry->valid = true;
+
+	return typeoid;
+}
+
+/*
+ * ext_sibling_callback
+ *		Syscache inval callback function for EXTENSIONOID cache
+ *
+ * It seems sufficient to invalidate ExtensionSiblingCache entries when
+ * the owning extension's pg_extension entry is modified or deleted.
+ * Neither a requesting function's OID, nor the OID of the object it's
+ * looking for, could change without an extension update or drop/recreate.
+ */
+static void
+ext_sibling_callback(Datum arg, int cacheid, uint32 hashvalue)
+{
+	ExtensionSiblingCache *cache_entry;
+
+	for (cache_entry = ext_sibling_list; cache_entry != NULL;
+		 cache_entry = cache_entry->next)
+	{
+		if (hashvalue == 0 ||
+			cache_entry->exthash == hashvalue)
+			cache_entry->valid = false;
+	}
+}
+
 /*
  * Utility functions to check validity of extension and version names
  */
diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c
index 51530eb2f56..47e4a02edb9 100644
--- a/src/backend/commands/operatorcmds.c
+++ b/src/backend/commands/operatorcmds.c
@@ -309,7 +309,6 @@ ValidateRestrictionEstimator(List *restrictionName)
 {
 	Oid			typeId[4];
 	Oid			restrictionOid;
-	AclResult	aclresult;
 
 	typeId[0] = INTERNALOID;	/* PlannerInfo */
 	typeId[1] = OIDOID;			/* operator OID */
@@ -325,11 +324,33 @@ ValidateRestrictionEstimator(List *restrictionName)
 				 errmsg("restriction estimator function %s must return type %s",
 						NameListToString(restrictionName), "float8")));
 
-	/* Require EXECUTE rights for the estimator */
-	aclresult = object_aclcheck(ProcedureRelationId, restrictionOid, GetUserId(), ACL_EXECUTE);
-	if (aclresult != ACLCHECK_OK)
-		aclcheck_error(aclresult, OBJECT_FUNCTION,
-					   NameListToString(restrictionName));
+	/*
+	 * If the estimator is not a built-in function, require superuser
+	 * privilege to install it.  This protects against using something that is
+	 * not a restriction estimator or has hard-wired assumptions about what
+	 * data types it is working with.  (Built-in estimators are required to
+	 * defend themselves adequately against unexpected data type choices, but
+	 * it seems impractical to expect that of extensions' estimators.)
+	 *
+	 * If it is built-in, only require EXECUTE rights.
+	 */
+	if (restrictionOid >= FirstGenbkiObjectId)
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to specify a non-built-in restriction estimator function")));
+	}
+	else
+	{
+		AclResult	aclresult;
+
+		aclresult = object_aclcheck(ProcedureRelationId, restrictionOid,
+									GetUserId(), ACL_EXECUTE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_FUNCTION,
+						   NameListToString(restrictionName));
+	}
 
 	return restrictionOid;
 }
@@ -345,7 +366,6 @@ ValidateJoinEstimator(List *joinName)
 	Oid			typeId[5];
 	Oid			joinOid;
 	Oid			joinOid2;
-	AclResult	aclresult;
 
 	typeId[0] = INTERNALOID;	/* PlannerInfo */
 	typeId[1] = OIDOID;			/* operator OID */
@@ -383,11 +403,24 @@ ValidateJoinEstimator(List *joinName)
 				 errmsg("join estimator function %s must return type %s",
 						NameListToString(joinName), "float8")));
 
-	/* Require EXECUTE rights for the estimator */
-	aclresult = object_aclcheck(ProcedureRelationId, joinOid, GetUserId(), ACL_EXECUTE);
-	if (aclresult != ACLCHECK_OK)
-		aclcheck_error(aclresult, OBJECT_FUNCTION,
-					   NameListToString(joinName));
+	/* privilege checks are the same as in ValidateRestrictionEstimator */
+	if (joinOid >= FirstGenbkiObjectId)
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to specify a non-built-in join estimator function")));
+	}
+	else
+	{
+		AclResult	aclresult;
+
+		aclresult = object_aclcheck(ProcedureRelationId, joinOid,
+									GetUserId(), ACL_EXECUTE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_FUNCTION,
+						   NameListToString(joinName));
+	}
 
 	return joinOid;
 }
diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c
index c7cf7c04b60..fe7ca30ce97 100644
--- a/src/backend/tsearch/dict_synonym.c
+++ b/src/backend/tsearch/dict_synonym.c
@@ -47,8 +47,8 @@ findwrd(char *in, char **end, uint16 *flags)
 	char	   *lastchar;
 
 	/* Skip leading spaces */
-	while (*in && t_isspace(in))
-		in += pg_mblen(in);
+	while (*in && t_isspace_cstr(in))
+		in += pg_mblen_cstr(in);
 
 	/* Return NULL on empty lines */
 	if (*in == '\0')
@@ -60,10 +60,10 @@ findwrd(char *in, char **end, uint16 *flags)
 	lastchar = start = in;
 
 	/* Find end of word */
-	while (*in && !t_isspace(in))
+	while (*in && !t_isspace_cstr(in))
 	{
 		lastchar = in;
-		in += pg_mblen(in);
+		in += pg_mblen_cstr(in);
 	}
 
 	if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c
index 80402e99375..dcee060ba19 100644
--- a/src/backend/tsearch/dict_thesaurus.c
+++ b/src/backend/tsearch/dict_thesaurus.c
@@ -190,8 +190,8 @@ thesaurusRead(const char *filename, DictThesaurus *d)
 		ptr = line;
 
 		/* is it a comment? */
-		while (*ptr && t_isspace(ptr))
-			ptr += pg_mblen(ptr);
+		while (*ptr && t_isspace_cstr(ptr))
+			ptr += pg_mblen_cstr(ptr);
 
 		if (t_iseq(ptr, '#') || *ptr == '\0' ||
 			t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
@@ -212,7 +212,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
 								 errmsg("unexpected delimiter")));
 					state = TR_WAITSUBS;
 				}
-				else if (!t_isspace(ptr))
+				else if (!t_isspace_cstr(ptr))
 				{
 					beginwrd = ptr;
 					state = TR_INLEX;
@@ -225,7 +225,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
 					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
 					state = TR_WAITSUBS;
 				}
-				else if (t_isspace(ptr))
+				else if (t_isspace_cstr(ptr))
 				{
 					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
 					state = TR_WAITLEX;
@@ -237,15 +237,15 @@ thesaurusRead(const char *filename, DictThesaurus *d)
 				{
 					useasis = true;
 					state = TR_INSUBS;
-					beginwrd = ptr + pg_mblen(ptr);
+					beginwrd = ptr + pg_mblen_cstr(ptr);
 				}
 				else if (t_iseq(ptr, '\\'))
 				{
 					useasis = false;
 					state = TR_INSUBS;
-					beginwrd = ptr + pg_mblen(ptr);
+					beginwrd = ptr + pg_mblen_cstr(ptr);
 				}
-				else if (!t_isspace(ptr))
+				else if (!t_isspace_cstr(ptr))
 				{
 					useasis = false;
 					beginwrd = ptr;
@@ -254,7 +254,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
 			}
 			else if (state == TR_INSUBS)
 			{
-				if (t_isspace(ptr))
+				if (t_isspace_cstr(ptr))
 				{
 					if (ptr == beginwrd)
 						ereport(ERROR,
@@ -267,7 +267,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
 			else
 				elog(ERROR, "unrecognized thesaurus state: %d", state);
 
-			ptr += pg_mblen(ptr);
+			ptr += pg_mblen_cstr(ptr);
 		}
 
 		if (state == TR_INSUBS)
diff --git a/src/backend/tsearch/regis.c b/src/backend/tsearch/regis.c
index 0c74c6d0c1c..ee5bc378350 100644
--- a/src/backend/tsearch/regis.c
+++ b/src/backend/tsearch/regis.c
@@ -37,7 +37,7 @@ RS_isRegis(const char *str)
 	{
 		if (state == RS_IN_WAIT)
 		{
-			if (t_isalpha(c))
+			if (t_isalpha_cstr(c))
 				 /* okay */ ;
 			else if (t_iseq(c, '['))
 				state = RS_IN_ONEOF;
@@ -48,14 +48,14 @@ RS_isRegis(const char *str)
 		{
 			if (t_iseq(c, '^'))
 				state = RS_IN_NONEOF;
-			else if (t_isalpha(c))
+			else if (t_isalpha_cstr(c))
 				state = RS_IN_ONEOF_IN;
 			else
 				return false;
 		}
 		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
 		{
-			if (t_isalpha(c))
+			if (t_isalpha_cstr(c))
 				 /* okay */ ;
 			else if (t_iseq(c, ']'))
 				state = RS_IN_WAIT;
@@ -64,7 +64,7 @@ RS_isRegis(const char *str)
 		}
 		else
 			elog(ERROR, "internal error in RS_isRegis: state %d", state);
-		c += pg_mblen(c);
+		c += pg_mblen_cstr(c);
 	}
 
 	return (state == RS_IN_WAIT);
@@ -96,15 +96,14 @@ RS_compile(Regis *r, bool issuffix, const char *str)
 	{
 		if (state == RS_IN_WAIT)
 		{
-			if (t_isalpha(c))
+			if (t_isalpha_cstr(c))
 			{
 				if (ptr)
 					ptr = newRegisNode(ptr, len);
 				else
 					ptr = r->node = newRegisNode(NULL, len);
-				COPYCHAR(ptr->data, c);
 				ptr->type = RSF_ONEOF;
-				ptr->len = pg_mblen(c);
+				ptr->len = ts_copychar_cstr(ptr->data, c);
 			}
 			else if (t_iseq(c, '['))
 			{
@@ -125,10 +124,9 @@ RS_compile(Regis *r, bool issuffix, const char *str)
 				ptr->type = RSF_NONEOF;
 				state = RS_IN_NONEOF;
 			}
-			else if (t_isalpha(c))
+			else if (t_isalpha_cstr(c))
 			{
-				COPYCHAR(ptr->data, c);
-				ptr->len = pg_mblen(c);
+				ptr->len = ts_copychar_cstr(ptr->data, c);
 				state = RS_IN_ONEOF_IN;
 			}
 			else				/* shouldn't get here */
@@ -136,11 +134,8 @@ RS_compile(Regis *r, bool issuffix, const char *str)
 		}
 		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
 		{
-			if (t_isalpha(c))
-			{
-				COPYCHAR(ptr->data + ptr->len, c);
-				ptr->len += pg_mblen(c);
-			}
+			if (t_isalpha_cstr(c))
+				ptr->len += ts_copychar_cstr(ptr->data + ptr->len, c);
 			else if (t_iseq(c, ']'))
 				state = RS_IN_WAIT;
 			else				/* shouldn't get here */
@@ -148,7 +143,7 @@ RS_compile(Regis *r, bool issuffix, const char *str)
 		}
 		else
 			elog(ERROR, "internal error in RS_compile: state %d", state);
-		c += pg_mblen(c);
+		c += pg_mblen_cstr(c);
 	}
 
 	if (state != RS_IN_WAIT)	/* shouldn't get here */
@@ -187,10 +182,10 @@ mb_strchr(char *str, char *c)
 	char	   *ptr = str;
 	bool		res = false;
 
-	clen = pg_mblen(c);
+	clen = pg_mblen_cstr(c);
 	while (*ptr && !res)
 	{
-		plen = pg_mblen(ptr);
+		plen = pg_mblen_cstr(ptr);
 		if (plen == clen)
 		{
 			i = plen;
@@ -219,7 +214,7 @@ RS_execute(Regis *r, char *str)
 	while (*c)
 	{
 		len++;
-		c += pg_mblen(c);
+		c += pg_mblen_cstr(c);
 	}
 
 	if (len < r->nchar)
@@ -230,7 +225,7 @@ RS_execute(Regis *r, char *str)
 	{
 		len -= r->nchar;
 		while (len-- > 0)
-			c += pg_mblen(c);
+			c += pg_mblen_cstr(c);
 	}
 
 
@@ -250,7 +245,7 @@ RS_execute(Regis *r, char *str)
 				elog(ERROR, "unrecognized regis node type: %d", ptr->type);
 		}
 		ptr = ptr->next;
-		c += pg_mblen(c);
+		c += pg_mblen_cstr(c);
 	}
 
 	return true;
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
index 8d1d81501f2..8f4fdf2782c 100644
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
@@ -232,7 +232,7 @@ findchar(char *str, int c)
 	{
 		if (t_iseq(str, c))
 			return str;
-		str += pg_mblen(str);
+		str += pg_mblen_cstr(str);
 	}
 
 	return NULL;
@@ -245,7 +245,7 @@ findchar2(char *str, int c1, int c2)
 	{
 		if (t_iseq(str, c1) || t_iseq(str, c2))
 			return str;
-		str += pg_mblen(str);
+		str += pg_mblen_cstr(str);
 	}
 
 	return NULL;
@@ -352,6 +352,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
 	char	   *next,
 			   *sbuf = *sflagset;
 	int			maxstep;
+	int			clen;
 	bool		stop = false;
 	bool		met_comma = false;
 
@@ -363,11 +364,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
 		{
 			case FM_LONG:
 			case FM_CHAR:
-				COPYCHAR(sflag, *sflagset);
-				sflag += pg_mblen(*sflagset);
+				clen = ts_copychar_cstr(sflag, *sflagset);
+				sflag += clen;
 
 				/* Go to start of the next flag */
-				*sflagset += pg_mblen(*sflagset);
+				*sflagset += clen;
 
 				/* Check if we get all characters of flag */
 				maxstep--;
@@ -391,7 +392,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
 				*sflagset = next;
 				while (**sflagset)
 				{
-					if (t_isdigit(*sflagset))
+					if (t_isdigit_cstr(*sflagset))
 					{
 						if (!met_comma)
 							ereport(ERROR,
@@ -409,7 +410,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
 											*sflagset)));
 						met_comma = true;
 					}
-					else if (!t_isspace(*sflagset))
+					else if (!t_isspace_cstr(*sflagset))
 					{
 						ereport(ERROR,
 								(errcode(ERRCODE_CONFIG_FILE_ERROR),
@@ -417,7 +418,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
 										*sflagset)));
 					}
 
-					*sflagset += pg_mblen(*sflagset);
+					*sflagset += pg_mblen_cstr(*sflagset);
 				}
 				stop = true;
 				break;
@@ -543,7 +544,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
 			while (*s)
 			{
 				/* we allow only single encoded flags for faster works */
-				if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
+				if (pg_mblen_cstr(s) == 1 && t_isprint_unbounded(s) && !t_isspace_unbounded(s))
 					s++;
 				else
 				{
@@ -559,12 +560,12 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
 		s = line;
 		while (*s)
 		{
-			if (t_isspace(s))
+			if (t_isspace_cstr(s))
 			{
 				*s = '\0';
 				break;
 			}
-			s += pg_mblen(s);
+			s += pg_mblen_cstr(s);
 		}
 		pstr = lowerstr_ctx(Conf, line);
 
@@ -796,17 +797,17 @@ get_nextfield(char **str, char *next)
 
 	while (**str)
 	{
+		int			clen = pg_mblen_cstr(*str);
+
 		if (state == PAE_WAIT_MASK)
 		{
 			if (t_iseq(*str, '#'))
 				return false;
-			else if (!t_isspace(*str))
+			else if (!t_isspace_cstr(*str))
 			{
-				int			clen = pg_mblen(*str);
-
 				if (clen < avail)
 				{
-					COPYCHAR(next, *str);
+					ts_copychar_with_len(next, *str, clen);
 					next += clen;
 					avail -= clen;
 				}
@@ -815,24 +816,22 @@ get_nextfield(char **str, char *next)
 		}
 		else					/* state == PAE_INMASK */
 		{
-			if (t_isspace(*str))
+			if (t_isspace_cstr(*str))
 			{
 				*next = '\0';
 				return true;
 			}
 			else
 			{
-				int			clen = pg_mblen(*str);
-
 				if (clen < avail)
 				{
-					COPYCHAR(next, *str);
+					ts_copychar_with_len(next, *str, clen);
 					next += clen;
 					avail -= clen;
 				}
 			}
 		}
-		*str += pg_mblen(*str);
+		*str += clen;
 	}
 
 	*next = '\0';
@@ -922,14 +921,15 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 
 	while (*str)
 	{
+		int			clen = pg_mblen_cstr(str);
+
 		if (state == PAE_WAIT_MASK)
 		{
 			if (t_iseq(str, '#'))
 				return false;
-			else if (!t_isspace(str))
+			else if (!t_isspace_cstr(str))
 			{
-				COPYCHAR(pmask, str);
-				pmask += pg_mblen(str);
+				pmask += ts_copychar_with_len(pmask, str, clen);
 				state = PAE_INMASK;
 			}
 		}
@@ -940,10 +940,9 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 				*pmask = '\0';
 				state = PAE_WAIT_FIND;
 			}
-			else if (!t_isspace(str))
+			else if (!t_isspace_cstr(str))
 			{
-				COPYCHAR(pmask, str);
-				pmask += pg_mblen(str);
+				pmask += ts_copychar_with_len(pmask, str, clen);
 			}
 		}
 		else if (state == PAE_WAIT_FIND)
@@ -952,13 +951,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 			{
 				state = PAE_INFIND;
 			}
-			else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
+			else if (t_isalpha_cstr(str) || t_iseq(str, '\'') /* english 's */ )
 			{
-				COPYCHAR(prepl, str);
-				prepl += pg_mblen(str);
+				prepl += ts_copychar_with_len(prepl, str, clen);
 				state = PAE_INREPL;
 			}
-			else if (!t_isspace(str))
+			else if (!t_isspace_cstr(str))
 				ereport(ERROR,
 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
 						 errmsg("syntax error")));
@@ -970,12 +968,11 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 				*pfind = '\0';
 				state = PAE_WAIT_REPL;
 			}
-			else if (t_isalpha(str))
+			else if (t_isalpha_cstr(str))
 			{
-				COPYCHAR(pfind, str);
-				pfind += pg_mblen(str);
+				pfind += ts_copychar_with_len(pfind, str, clen);
 			}
-			else if (!t_isspace(str))
+			else if (!t_isspace_cstr(str))
 				ereport(ERROR,
 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
 						 errmsg("syntax error")));
@@ -986,13 +983,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 			{
 				break;			/* void repl */
 			}
-			else if (t_isalpha(str))
+			else if (t_isalpha_cstr(str))
 			{
-				COPYCHAR(prepl, str);
-				prepl += pg_mblen(str);
+				prepl += ts_copychar_with_len(prepl, str, clen);
 				state = PAE_INREPL;
 			}
-			else if (!t_isspace(str))
+			else if (!t_isspace_cstr(str))
 				ereport(ERROR,
 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
 						 errmsg("syntax error")));
@@ -1004,12 +1000,11 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 				*prepl = '\0';
 				break;
 			}
-			else if (t_isalpha(str))
+			else if (t_isalpha_cstr(str))
 			{
-				COPYCHAR(prepl, str);
-				prepl += pg_mblen(str);
+				prepl += ts_copychar_with_len(prepl, str, clen);
 			}
-			else if (!t_isspace(str))
+			else if (!t_isspace_cstr(str))
 				ereport(ERROR,
 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
 						 errmsg("syntax error")));
@@ -1017,7 +1012,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 		else
 			elog(ERROR, "unrecognized state in parse_affentry: %d", state);
 
-		str += pg_mblen(str);
+		str += clen;
 	}
 
 	*pmask = *pfind = *prepl = '\0';
@@ -1070,10 +1065,9 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
 	CompoundAffixFlag *newValue;
 	char		sbuf[BUFSIZ];
 	char	   *sflag;
-	int			clen;
 
-	while (*s && t_isspace(s))
-		s += pg_mblen(s);
+	while (*s && t_isspace_cstr(s))
+		s += pg_mblen_cstr(s);
 
 	if (!*s)
 		ereport(ERROR,
@@ -1082,10 +1076,10 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
 
 	/* Get flag without \n */
 	sflag = sbuf;
-	while (*s && !t_isspace(s) && *s != '\n')
+	while (*s && !t_isspace_cstr(s) && *s != '\n')
 	{
-		clen = pg_mblen(s);
-		COPYCHAR(sflag, s);
+		int			clen = ts_copychar_cstr(sflag, s);
+
 		sflag += clen;
 		s += clen;
 	}
@@ -1228,7 +1222,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
 
 	while ((recoded = tsearch_readline(&trst)) != NULL)
 	{
-		if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
+		if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#'))
 		{
 			pfree(recoded);
 			continue;
@@ -1265,8 +1259,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
 		{
 			char	   *s = recoded + strlen("FLAG");
 
-			while (*s && t_isspace(s))
-				s += pg_mblen(s);
+			while (*s && t_isspace_cstr(s))
+				s += pg_mblen_cstr(s);
 
 			if (*s)
 			{
@@ -1301,7 +1295,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
 	{
 		int			fields_read;
 
-		if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
+		if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#'))
 			goto nextline;
 
 		fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
@@ -1464,12 +1458,12 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
 			s = findchar2(recoded, 'l', 'L');
 			if (s)
 			{
-				while (*s && !t_isspace(s))
-					s += pg_mblen(s);
-				while (*s && t_isspace(s))
-					s += pg_mblen(s);
+				while (*s && !t_isspace_cstr(s))
+					s += pg_mblen_cstr(s);
+				while (*s && t_isspace_cstr(s))
+					s += pg_mblen_cstr(s);
 
-				if (*s && pg_mblen(s) == 1)
+				if (*s && pg_mblen_cstr(s) == 1)
 				{
 					addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
 					Conf->usecompound = true;
@@ -1497,8 +1491,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
 			s = recoded + 4;	/* we need non-lowercased string */
 			flagflags = 0;
 
-			while (*s && t_isspace(s))
-				s += pg_mblen(s);
+			while (*s && t_isspace_cstr(s))
+				s += pg_mblen_cstr(s);
 
 			if (*s == '*')
 			{
@@ -1519,14 +1513,13 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
 			 * be followed by EOL, whitespace, or ':'.  Otherwise this is a
 			 * new-format flag command.
 			 */
-			if (*s && pg_mblen(s) == 1)
+			if (*s && pg_mblen_cstr(s) == 1)
 			{
-				COPYCHAR(flag, s);
+				flag[0] = *s++;
 				flag[1] = '\0';
 
-				s++;
 				if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
-					t_isspace(s))
+					t_isspace_cstr(s))
 				{
 					oldformat = true;
 					goto nextline;
@@ -1750,7 +1743,7 @@ NISortDictionary(IspellDict *Conf)
 							(errcode(ERRCODE_CONFIG_FILE_ERROR),
 							 errmsg("invalid affix alias \"%s\"",
 									Conf->Spell[i]->p.flag)));
-				if (*end != '\0' && !t_isdigit(end) && !t_isspace(end))
+				if (*end != '\0' && !t_isdigit_cstr(end) && !t_isspace_cstr(end))
 					ereport(ERROR,
 							(errcode(ERRCODE_CONFIG_FILE_ERROR),
 							 errmsg("invalid affix alias \"%s\"",
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index 537ad288f53..4a01b65d577 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -33,92 +33,44 @@ static void tsearch_readline_callback(void *arg);
  */
 #define WC_BUF_LEN  3
 
-/*
- * The reason these functions use a 3-wchar_t output buffer, not 2 as you
- * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be
- * getting from char2wchar() is UTF16 not UTF32.  A single input character
- * may therefore produce a surrogate pair rather than just one wchar_t;
- * we also need room for a trailing null.  When we do get a surrogate pair,
- * we pass just the first code to iswdigit() etc, so that these functions will
- * always return false for characters outside the Basic Multilingual Plane.
- */
-#define WC_BUF_LEN  3
-
-int
-t_isdigit(const char *ptr)
-{
-	int			clen = pg_mblen(ptr);
-	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
-
-	if (clen == 1 || database_ctype_is_c)
-		return isdigit(TOUCHAR(ptr));
-
-	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
-	return iswdigit((wint_t) character[0]);
-}
-
-int
-t_isspace(const char *ptr)
-{
-	int			clen = pg_mblen(ptr);
-	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
-
-	if (clen == 1 || database_ctype_is_c)
-		return isspace(TOUCHAR(ptr));
-
-	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
-	return iswspace((wint_t) character[0]);
-}
-
-int
-t_isalpha(const char *ptr)
-{
-	int			clen = pg_mblen(ptr);
-	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
-
-	if (clen == 1 || database_ctype_is_c)
-		return isalpha(TOUCHAR(ptr));
-
-	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
-	return iswalpha((wint_t) character[0]);
-}
-
-int
-t_isalnum(const char *ptr)
-{
-	int			clen = pg_mblen(ptr);
-	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
-
-	if (clen == 1 || database_ctype_is_c)
-		return isalnum(TOUCHAR(ptr));
-
-	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
-	return iswalnum((wint_t) character[0]);
-}
-
-int
-t_isprint(const char *ptr)
-{
-	int			clen = pg_mblen(ptr);
-	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
-
-	if (clen == 1 || database_ctype_is_c)
-		return isprint(TOUCHAR(ptr));
-
-	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
-	return iswprint((wint_t) character[0]);
+#define GENERATE_T_ISCLASS_DEF(character_class) \
+/* mblen shall be that of the first character */ \
+int \
+t_is##character_class##_with_len(const char *ptr, int mblen) \
+{ \
+	int			clen = pg_mblen_with_len(ptr, mblen); \
+	wchar_t		character[WC_BUF_LEN]; \
+	pg_locale_t mylocale = 0;	/* TODO */ \
+	if (clen == 1 || database_ctype_is_c) \
+		return is##character_class(TOUCHAR(ptr)); \
+	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); \
+	return isw##character_class((wint_t) character[0]); \
+} \
+\
+/* ptr shall point to a NUL-terminated string */ \
+int \
+t_is##character_class##_cstr(const char *ptr) \
+{ \
+	return t_is##character_class##_with_len(ptr, pg_mblen_cstr(ptr)); \
+} \
+/* ptr shall point to a string with pre-validated encoding */ \
+int \
+t_is##character_class##_unbounded(const char *ptr) \
+{ \
+	return t_is##character_class##_with_len(ptr, pg_mblen_unbounded(ptr)); \
+} \
+/* historical name for _unbounded */ \
+int \
+t_is##character_class(const char *ptr) \
+{ \
+	return t_is##character_class##_unbounded(ptr); \
 }
 
+GENERATE_T_ISCLASS_DEF(alnum)
+GENERATE_T_ISCLASS_DEF(alpha)
+GENERATE_T_ISCLASS_DEF(digit)
+GENERATE_T_ISCLASS_DEF(print)
+GENERATE_T_ISCLASS_DEF(space)
 
 /*
  * Set up to read a file using tsearch_readline().  This facility is
diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c
index 92afc67a5c4..511ba6be7ff 100644
--- a/src/backend/tsearch/ts_selfuncs.c
+++ b/src/backend/tsearch/ts_selfuncs.c
@@ -109,12 +109,14 @@ tsmatchsel(PG_FUNCTION_ARGS)
 	 * OK, there's a Var and a Const we're dealing with here.  We need the
 	 * Const to be a TSQuery, else we can't do anything useful.  We have to
 	 * check this because the Var might be the TSQuery not the TSVector.
+	 *
+	 * Also check that the Var really is a TSVector, in case this estimator is
+	 * mistakenly attached to some other operator.
 	 */
-	if (((Const *) other)->consttype == TSQUERYOID)
+	if (((Const *) other)->consttype == TSQUERYOID &&
+		vardata.vartype == TSVECTOROID)
 	{
 		/* tsvector @@ tsquery or the other way around */
-		Assert(vardata.vartype == TSVECTOROID);
-
 		selec = tsquerysel(&vardata, ((Const *) other)->constvalue);
 	}
 	else
diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c
index 7c4c2a91123..463e5253558 100644
--- a/src/backend/tsearch/ts_utils.c
+++ b/src/backend/tsearch/ts_utils.c
@@ -88,8 +88,8 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
 			char	   *pbuf = line;
 
 			/* Trim trailing space */
-			while (*pbuf && !t_isspace(pbuf))
-				pbuf += pg_mblen(pbuf);
+			while (*pbuf && !t_isspace_cstr(pbuf))
+				pbuf += pg_mblen_cstr(pbuf);
 			*pbuf = '\0';
 
 			/* Skip empty lines */
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 9130b148366..05d605ade51 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -1727,7 +1727,8 @@ TParserGet(TParser *prs)
 			prs->state->charlen = 0;
 		else
 			prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen :
-				pg_mblen(prs->str + prs->state->posbyte);
+				pg_mblen_range(prs->str + prs->state->posbyte,
+							   prs->str + prs->lenstr);
 
 		Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr);
 		Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null);
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index ff3e1af0a1d..ec28adc92c6 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3803,6 +3803,12 @@ deconstruct_array_builtin(ArrayType *array,
 			elmalign = TYPALIGN_SHORT;
 			break;
 
+		case INT4OID:
+			elmlen = sizeof(int32);
+			elmbyval = true;
+			elmalign = TYPALIGN_INT;
+			break;
+
 		case OIDOID:
 			elmlen = sizeof(Oid);
 			elmbyval = true;
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index e5ac3ad23df..a20fbf18c24 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -215,7 +215,7 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
 			ereturn(escontext, 0,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("invalid hexadecimal digit: \"%.*s\"",
-							pg_mblen(s), s)));
+							pg_mblen_range(s, srcend), s)));
 		s++;
 		if (s >= srcend)
 			ereturn(escontext, 0,
@@ -225,7 +225,7 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
 			ereturn(escontext, 0,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("invalid hexadecimal digit: \"%.*s\"",
-							pg_mblen(s), s)));
+							pg_mblen_range(s, srcend), s)));
 		s++;
 		*p++ = (v1 << 4) | v2;
 	}
@@ -354,7 +354,7 @@ pg_base64_decode(const char *src, size_t len, char *dst)
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 						 errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
-								pg_mblen(s - 1), s - 1)));
+								pg_mblen_range(s - 1, srcend), s - 1)));
 		}
 		/* add it to buffer */
 		buf = (buf << 6) + b;
diff --git a/src/backend/utils/adt/format_type.c b/src/backend/utils/adt/format_type.c
index 12402a06379..a9054d11b0c 100644
--- a/src/backend/utils/adt/format_type.c
+++ b/src/backend/utils/adt/format_type.c
@@ -444,11 +444,15 @@ oidvectortypes(PG_FUNCTION_ARGS)
 {
 	oidvector  *oidArray = (oidvector *) PG_GETARG_POINTER(0);
 	char	   *result;
-	int			numargs = oidArray->dim1;
+	int			numargs;
 	int			num;
 	size_t		total;
 	size_t		left;
 
+	/* validate input before fetching dim1 */
+	check_valid_oidvector(oidArray);
+	numargs = oidArray->dim1;
+
 	total = 20 * numargs + 1;
 	result = palloc(total);
 	result[0] = '\0';
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 738cdf81a7f..35e64caaaa6 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1385,7 +1385,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
 					ereport(ERROR,
 							(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
 							 errmsg("invalid datetime format separator: \"%s\"",
-									pnstrdup(str, pg_mblen(str)))));
+									pnstrdup(str, pg_mblen_cstr(str)))));
 
 				if (*str == ' ')
 					n->type = NODE_TYPE_SPACE;
@@ -1415,7 +1415,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
 					/* backslash quotes the next character, if any */
 					if (*str == '\\' && *(str + 1))
 						str++;
-					chlen = pg_mblen(str);
+					chlen = pg_mblen_cstr(str);
 					n->type = NODE_TYPE_CHAR;
 					memcpy(n->character, str, chlen);
 					n->character[chlen] = '\0';
@@ -1433,7 +1433,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
 				 */
 				if (*str == '\\' && *(str + 1) == '"')
 					str++;
-				chlen = pg_mblen(str);
+				chlen = pg_mblen_cstr(str);
 
 				if ((flags & DCH_FLAG) && is_separator_char(str))
 					n->type = NODE_TYPE_SEPARATOR;
@@ -2138,8 +2138,8 @@ asc_toupper_z(const char *buff)
 	do { \
 		if (S_THth(_suf)) \
 		{ \
-			if (*(ptr)) (ptr) += pg_mblen(ptr); \
-			if (*(ptr)) (ptr) += pg_mblen(ptr); \
+			if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \
+			if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \
 		} \
 	} while (0)
 
@@ -3345,7 +3345,7 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
 				 * insist that the consumed character match the format's
 				 * character.
 				 */
-				s += pg_mblen(s);
+				s += pg_mblen_cstr(s);
 			}
 			continue;
 		}
@@ -3367,11 +3367,11 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
 				if (extra_skip > 0)
 					extra_skip--;
 				else
-					s += pg_mblen(s);
+					s += pg_mblen_cstr(s);
 			}
 			else
 			{
-				int			chlen = pg_mblen(s);
+				int			chlen = pg_mblen_cstr(s);
 
 				/*
 				 * Standard mode requires strict match of format characters.
@@ -5615,13 +5615,15 @@ NUM_numpart_to_char(NUMProc *Np, int id)
 static void
 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
 {
+	const char *end = Np->inout + input_len;
+
 	while (n-- > 0)
 	{
 		if (OVERLOAD_TEST)
 			break;				/* end of input */
 		if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
 			break;				/* it's a data character */
-		Np->inout_p += pg_mblen(Np->inout_p);
+		Np->inout_p += pg_mblen_range(Np->inout_p, end);
 	}
 }
 
@@ -6074,7 +6076,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
 			}
 			else
 			{
-				Np->inout_p += pg_mblen(Np->inout_p);
+				Np->inout_p += pg_mblen_range(Np->inout_p, Np->inout + input_len);
 			}
 			continue;
 		}
diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index 44d1c7ad0c4..f9a08257ac3 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -134,6 +134,30 @@ buildint2vector(const int16 *int2s, int n)
 	return result;
 }
 
+/*
+ * validate that an array object meets the restrictions of int2vector
+ *
+ * We need this because there are pathways by which a general int2[] array can
+ * be cast to int2vector, allowing the type's restrictions to be violated.
+ * All code that receives an int2vector as a SQL parameter should check this.
+ */
+static void
+check_valid_int2vector(const int2vector *int2Array)
+{
+	/*
+	 * We insist on ndim == 1 and dataoffset == 0 (that is, no nulls) because
+	 * otherwise the array's layout will not be what calling code expects.  We
+	 * needn't be picky about the index lower bound though.  Checking elemtype
+	 * is just paranoia.
+	 */
+	if (int2Array->ndim != 1 ||
+		int2Array->dataoffset != 0 ||
+		int2Array->elemtype != INT2OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATATYPE_MISMATCH),
+				 errmsg("array is not a valid int2vector")));
+}
+
 /*
  *		int2vectorin			- converts "num num ..." to internal form
  */
@@ -208,10 +232,14 @@ int2vectorout(PG_FUNCTION_ARGS)
 {
 	int2vector *int2Array = (int2vector *) PG_GETARG_POINTER(0);
 	int			num,
-				nnums = int2Array->dim1;
+				nnums;
 	char	   *rp;
 	char	   *result;
 
+	/* validate input before fetching dim1 */
+	check_valid_int2vector(int2Array);
+	nnums = int2Array->dim1;
+
 	/* assumes sign, 5 digits, ' ' */
 	rp = result = (char *) palloc(nnums * 7 + 1);
 	for (num = 0; num < nnums; num++)
@@ -272,6 +300,7 @@ int2vectorrecv(PG_FUNCTION_ARGS)
 Datum
 int2vectorsend(PG_FUNCTION_ARGS)
 {
+	/* We don't do check_valid_int2vector, since array_send won't care */
 	return array_send(fcinfo);
 }
 
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 70cb922e6b7..42b886c621a 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -682,7 +682,7 @@ report_json_context(JsonLexContext *lex)
 	{
 		/* Advance to next multibyte character */
 		if (IS_HIGHBIT_SET(*context_start))
-			context_start += pg_mblen(context_start);
+			context_start += pg_mblen_range(context_start, context_end);
 		else
 			context_start++;
 	}
diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y
index adc259d5bf8..c1880c113b8 100644
--- a/src/backend/utils/adt/jsonpath_gram.y
+++ b/src/backend/utils/adt/jsonpath_gram.y
@@ -527,7 +527,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
 						(errcode(ERRCODE_SYNTAX_ERROR),
 						 errmsg("invalid input syntax for type %s", "jsonpath"),
 						 errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.",
-								   pg_mblen(flags->val + i), flags->val + i)));
+								   pg_mblen_range(flags->val + i, flags->val + flags->len),
+								   flags->val + i)));
 				break;
 		}
 	}
diff --git a/src/backend/utils/adt/levenshtein.c b/src/backend/utils/adt/levenshtein.c
index 0763daf0e83..9a84c8d0fc4 100644
--- a/src/backend/utils/adt/levenshtein.c
+++ b/src/backend/utils/adt/levenshtein.c
@@ -83,6 +83,8 @@ varstr_levenshtein(const char *source, int slen,
 	int		   *s_char_len = NULL;
 	int			j;
 	const char *y;
+	const char *send = source + slen;
+	const char *tend = target + tlen;
 
 	/*
 	 * For varstr_levenshtein_less_equal, we have real variables called
@@ -183,10 +185,10 @@ varstr_levenshtein(const char *source, int slen,
 #endif
 
 	/*
-	 * In order to avoid calling pg_mblen() repeatedly on each character in s,
-	 * we cache all the lengths before starting the main loop -- but if all
-	 * the characters in both strings are single byte, then we skip this and
-	 * use a fast-path in the main loop.  If only one string contains
+	 * In order to avoid calling pg_mblen_range() repeatedly on each character
+	 * in s, we cache all the lengths before starting the main loop -- but if
+	 * all the characters in both strings are single byte, then we skip this
+	 * and use a fast-path in the main loop.  If only one string contains
 	 * multi-byte characters, we still build the array, so that the fast-path
 	 * needn't deal with the case where the array hasn't been initialized.
 	 */
@@ -198,7 +200,7 @@ varstr_levenshtein(const char *source, int slen,
 		s_char_len = (int *) palloc((m + 1) * sizeof(int));
 		for (i = 0; i < m; ++i)
 		{
-			s_char_len[i] = pg_mblen(cp);
+			s_char_len[i] = pg_mblen_range(cp, send);
 			cp += s_char_len[i];
 		}
 		s_char_len[i] = 0;
@@ -224,7 +226,7 @@ varstr_levenshtein(const char *source, int slen,
 	{
 		int		   *temp;
 		const char *x = source;
-		int			y_char_len = n != tlen + 1 ? pg_mblen(y) : 1;
+		int			y_char_len = n != tlen + 1 ? pg_mblen_range(y, tend) : 1;
 		int			i;
 
 #ifdef LEVENSHTEIN_LESS_EQUAL
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 33a2f46aab0..776112c695f 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -55,20 +55,20 @@ static int	Generic_Text_IC_like(text *str, text *pat, Oid collation);
  *--------------------
  */
 static inline int
-wchareq(const char *p1, const char *p2)
+wchareq(const char *p1, int p1len, const char *p2, int p2len)
 {
-	int			p1_len;
+	int			p1clen;
 
 	/* Optimization:  quickly compare the first byte. */
 	if (*p1 != *p2)
 		return 0;
 
-	p1_len = pg_mblen(p1);
-	if (pg_mblen(p2) != p1_len)
+	p1clen = pg_mblen_with_len(p1, p1len);
+	if (pg_mblen_with_len(p2, p2len) != p1clen)
 		return 0;
 
 	/* They are the same length */
-	while (p1_len--)
+	while (p1clen--)
 	{
 		if (*p1++ != *p2++)
 			return 0;
@@ -107,11 +107,11 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
 #define NextByte(p, plen)	((p)++, (plen)--)
 
 /* Set up to compile like_match.c for multibyte characters */
-#define CHAREQ(p1, p2) wchareq((p1), (p2))
+#define CHAREQ(p1, p1len, p2, p2len) wchareq((p1), (p1len), (p2), (p2len))
 #define NextChar(p, plen) \
-	do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
+	do { int __l = pg_mblen_with_len((p), (plen)); (p) +=__l; (plen) -=__l; } while (0)
 #define CopyAdvChar(dst, src, srclen) \
-	do { int __l = pg_mblen(src); \
+	do { int __l = pg_mblen_with_len((src), (srclen)); \
 		 (srclen) -= __l; \
 		 while (__l-- > 0) \
 			 *(dst)++ = *(src)++; \
@@ -123,7 +123,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
 #include "like_match.c"
 
 /* Set up to compile like_match.c for single-byte characters */
-#define CHAREQ(p1, p2) (*(p1) == *(p2))
+#define CHAREQ(p1, p1len, p2, p2len) (*(p1) == *(p2))
 #define NextChar(p, plen) NextByte((p), (plen))
 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
 
diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c
index 8b2dff6d6e2..e586de9efd1 100644
--- a/src/backend/utils/adt/like_match.c
+++ b/src/backend/utils/adt/like_match.c
@@ -294,6 +294,7 @@ do_like_escape(text *pat, text *esc)
 					 errhint("Escape string must be empty or one character.")));
 
 		e = VARDATA_ANY(esc);
+		elen = VARSIZE_ANY_EXHDR(esc);
 
 		/*
 		 * If specified escape is '\', just copy the pattern as-is.
@@ -312,7 +313,7 @@ do_like_escape(text *pat, text *esc)
 		afterescape = false;
 		while (plen > 0)
 		{
-			if (CHAREQ(p, e) && !afterescape)
+			if (CHAREQ(p, plen, e, elen) && !afterescape)
 			{
 				*r++ = '\\';
 				NextChar(p, plen);
diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c
index a8c0f954dfa..192df1c569d 100644
--- a/src/backend/utils/adt/network_selfuncs.c
+++ b/src/backend/utils/adt/network_selfuncs.c
@@ -43,9 +43,9 @@
 /* Maximum number of items to consider in join selectivity calculations */
 #define MAX_CONSIDERED_ELEMS 1024
 
-static Selectivity networkjoinsel_inner(Oid operator,
+static Selectivity networkjoinsel_inner(Oid operator, int opr_codenum,
 										VariableStatData *vardata1, VariableStatData *vardata2);
-static Selectivity networkjoinsel_semi(Oid operator,
+static Selectivity networkjoinsel_semi(Oid operator, int opr_codenum,
 									   VariableStatData *vardata1, VariableStatData *vardata2);
 static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues);
 static Selectivity inet_hist_value_sel(Datum *values, int nvalues,
@@ -82,6 +82,7 @@ networksel(PG_FUNCTION_ARGS)
 	Oid			operator = PG_GETARG_OID(1);
 	List	   *args = (List *) PG_GETARG_POINTER(2);
 	int			varRelid = PG_GETARG_INT32(3);
+	int			opr_codenum;
 	VariableStatData vardata;
 	Node	   *other;
 	bool		varonleft;
@@ -95,6 +96,14 @@ networksel(PG_FUNCTION_ARGS)
 				nullfrac;
 	FmgrInfo	proc;
 
+	/*
+	 * Before all else, verify that the operator is one of the ones supported
+	 * by this function, which in turn proves that the input datatypes are
+	 * what we expect.  Otherwise, attaching this selectivity function to some
+	 * unexpected operator could cause trouble.
+	 */
+	opr_codenum = inet_opr_codenum(operator);
+
 	/*
 	 * If expression is not (variable op something) or (something op
 	 * variable), then punt and return a default estimate.
@@ -150,13 +159,12 @@ networksel(PG_FUNCTION_ARGS)
 						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
 						 ATTSTATSSLOT_VALUES))
 	{
-		int			opr_codenum = inet_opr_codenum(operator);
+		int			h_codenum;
 
 		/* Commute if needed, so we can consider histogram to be on the left */
-		if (!varonleft)
-			opr_codenum = -opr_codenum;
+		h_codenum = varonleft ? opr_codenum : -opr_codenum;
 		non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues,
-											constvalue, opr_codenum);
+											constvalue, h_codenum);
 
 		free_attstatsslot(&hslot);
 	}
@@ -203,10 +211,19 @@ networkjoinsel(PG_FUNCTION_ARGS)
 #endif
 	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
 	double		selec;
+	int			opr_codenum;
 	VariableStatData vardata1;
 	VariableStatData vardata2;
 	bool		join_is_reversed;
 
+	/*
+	 * Before all else, verify that the operator is one of the ones supported
+	 * by this function, which in turn proves that the input datatypes are
+	 * what we expect.  Otherwise, attaching this selectivity function to some
+	 * unexpected operator could cause trouble.
+	 */
+	opr_codenum = inet_opr_codenum(operator);
+
 	get_join_variables(root, args, sjinfo,
 					   &vardata1, &vardata2, &join_is_reversed);
 
@@ -220,16 +237,19 @@ networkjoinsel(PG_FUNCTION_ARGS)
 			 * Selectivity for left/full join is not exactly the same as inner
 			 * join, but we neglect the difference, as eqjoinsel does.
 			 */
-			selec = networkjoinsel_inner(operator, &vardata1, &vardata2);
+			selec = networkjoinsel_inner(operator, opr_codenum,
+										 &vardata1, &vardata2);
 			break;
 		case JOIN_SEMI:
 		case JOIN_ANTI:
 		case JOIN_LASJ_NOTIN:
 			/* Here, it's important that we pass the outer var on the left. */
 			if (!join_is_reversed)
-				selec = networkjoinsel_semi(operator, &vardata1, &vardata2);
+				selec = networkjoinsel_semi(operator, opr_codenum,
+											&vardata1, &vardata2);
 			else
 				selec = networkjoinsel_semi(get_commutator(operator),
+											-opr_codenum,
 											&vardata2, &vardata1);
 			break;
 		default:
@@ -261,7 +281,7 @@ networkjoinsel(PG_FUNCTION_ARGS)
  * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
  */
 static Selectivity
-networkjoinsel_inner(Oid operator,
+networkjoinsel_inner(Oid operator, int opr_codenum,
 					 VariableStatData *vardata1, VariableStatData *vardata2)
 {
 	Form_pg_statistic stats;
@@ -274,7 +294,6 @@ networkjoinsel_inner(Oid operator,
 				mcv2_exists = false,
 				hist1_exists = false,
 				hist2_exists = false;
-	int			opr_codenum;
 	int			mcv1_length = 0,
 				mcv2_length = 0;
 	AttStatsSlot mcv1_slot;
@@ -326,8 +345,6 @@ networkjoinsel_inner(Oid operator,
 		memset(&hist2_slot, 0, sizeof(hist2_slot));
 	}
 
-	opr_codenum = inet_opr_codenum(operator);
-
 	/*
 	 * Calculate selectivity for MCV vs MCV matches.
 	 */
@@ -388,7 +405,7 @@ networkjoinsel_inner(Oid operator,
  * histogram selectivity for semi/anti join cases.
  */
 static Selectivity
-networkjoinsel_semi(Oid operator,
+networkjoinsel_semi(Oid operator, int opr_codenum,
 					VariableStatData *vardata1, VariableStatData *vardata2)
 {
 	Form_pg_statistic stats;
@@ -402,7 +419,6 @@ networkjoinsel_semi(Oid operator,
 				mcv2_exists = false,
 				hist1_exists = false,
 				hist2_exists = false;
-	int			opr_codenum;
 	FmgrInfo	proc;
 	int			i,
 				mcv1_length = 0,
@@ -456,7 +472,6 @@ networkjoinsel_semi(Oid operator,
 		memset(&hist2_slot, 0, sizeof(hist2_slot));
 	}
 
-	opr_codenum = inet_opr_codenum(operator);
 	fmgr_info(get_opcode(operator), &proc);
 
 	/* Estimate number of input rows represented by RHS histogram. */
@@ -828,6 +843,9 @@ inet_semi_join_sel(Datum lhs_value,
 /*
  * Assign useful code numbers for the subnet inclusion/overlap operators
  *
+ * This will throw an error if the operator is not one of the ones we
+ * support in networksel() and networkjoinsel().
+ *
  * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend
  * on the exact codes assigned here; but many other places in this file
  * know that they can negate a code to obtain the code for the commutator
diff --git a/src/backend/utils/adt/oid.c b/src/backend/utils/adt/oid.c
index 3f7af5b3a06..066511443cf 100644
--- a/src/backend/utils/adt/oid.c
+++ b/src/backend/utils/adt/oid.c
@@ -106,6 +106,30 @@ buildoidvector(const Oid *oids, int n)
 	return result;
 }
 
+/*
+ * validate that an array object meets the restrictions of oidvector
+ *
+ * We need this because there are pathways by which a general oid[] array can
+ * be cast to oidvector, allowing the type's restrictions to be violated.
+ * All code that receives an oidvector as a SQL parameter should check this.
+ */
+void
+check_valid_oidvector(const oidvector *oidArray)
+{
+	/*
+	 * We insist on ndim == 1 and dataoffset == 0 (that is, no nulls) because
+	 * otherwise the array's layout will not be what calling code expects.  We
+	 * needn't be picky about the index lower bound though.  Checking elemtype
+	 * is just paranoia.
+	 */
+	if (oidArray->ndim != 1 ||
+		oidArray->dataoffset != 0 ||
+		oidArray->elemtype != OIDOID)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATATYPE_MISMATCH),
+				 errmsg("array is not a valid oidvector")));
+}
+
 /*
  *		oidvectorin			- converts "num num ..." to internal form
  */
@@ -158,10 +182,14 @@ oidvectorout(PG_FUNCTION_ARGS)
 {
 	oidvector  *oidArray = (oidvector *) PG_GETARG_POINTER(0);
 	int			num,
-				nnums = oidArray->dim1;
+				nnums;
 	char	   *rp;
 	char	   *result;
 
+	/* validate input before fetching dim1 */
+	check_valid_oidvector(oidArray);
+	nnums = oidArray->dim1;
+
 	/* assumes sign, 10 digits, ' ' */
 	rp = result = (char *) palloc(nnums * 12 + 1);
 	for (num = 0; num < nnums; num++)
@@ -224,6 +252,7 @@ oidvectorrecv(PG_FUNCTION_ARGS)
 Datum
 oidvectorsend(PG_FUNCTION_ARGS)
 {
+	/* We don't do check_valid_oidvector, since array_send won't care */
 	return array_send(fcinfo);
 }
 
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
index 3b5b794afb3..8d025011e2f 100644
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -153,8 +153,8 @@ lpad(PG_FUNCTION_ARGS)
 	char	   *ptr1,
 			   *ptr2,
 			   *ptr2start,
-			   *ptr2end,
 			   *ptr_ret;
+	const char *ptr2end;
 	int			m,
 				s1len,
 				s2len;
@@ -199,7 +199,7 @@ lpad(PG_FUNCTION_ARGS)
 
 	while (m--)
 	{
-		int			mlen = pg_mblen(ptr2);
+		int			mlen = pg_mblen_range(ptr2, ptr2end);
 
 		memcpy(ptr_ret, ptr2, mlen);
 		ptr_ret += mlen;
@@ -212,7 +212,7 @@ lpad(PG_FUNCTION_ARGS)
 
 	while (s1len--)
 	{
-		int			mlen = pg_mblen(ptr1);
+		int			mlen = pg_mblen_unbounded(ptr1);
 
 		memcpy(ptr_ret, ptr1, mlen);
 		ptr_ret += mlen;
@@ -251,8 +251,8 @@ rpad(PG_FUNCTION_ARGS)
 	char	   *ptr1,
 			   *ptr2,
 			   *ptr2start,
-			   *ptr2end,
 			   *ptr_ret;
+	const char *ptr2end;
 	int			m,
 				s1len,
 				s2len;
@@ -292,11 +292,12 @@ rpad(PG_FUNCTION_ARGS)
 	m = len - s1len;
 
 	ptr1 = VARDATA_ANY(string1);
+
 	ptr_ret = VARDATA(ret);
 
 	while (s1len--)
 	{
-		int			mlen = pg_mblen(ptr1);
+		int			mlen = pg_mblen_unbounded(ptr1);
 
 		memcpy(ptr_ret, ptr1, mlen);
 		ptr_ret += mlen;
@@ -308,7 +309,7 @@ rpad(PG_FUNCTION_ARGS)
 
 	while (m--)
 	{
-		int			mlen = pg_mblen(ptr2);
+		int			mlen = pg_mblen_range(ptr2, ptr2end);
 
 		memcpy(ptr_ret, ptr2, mlen);
 		ptr_ret += mlen;
@@ -393,6 +394,7 @@ dotrim(const char *string, int stringlen,
 			 */
 			const char **stringchars;
 			const char **setchars;
+			const char *setend;
 			int		   *stringmblen;
 			int		   *setmblen;
 			int			stringnchars;
@@ -400,6 +402,7 @@ dotrim(const char *string, int stringlen,
 			int			resultndx;
 			int			resultnchars;
 			const char *p;
+			const char *pend;
 			int			len;
 			int			mblen;
 			const char *str_pos;
@@ -410,10 +413,11 @@ dotrim(const char *string, int stringlen,
 			stringnchars = 0;
 			p = string;
 			len = stringlen;
+			pend = p + len;
 			while (len > 0)
 			{
 				stringchars[stringnchars] = p;
-				stringmblen[stringnchars] = mblen = pg_mblen(p);
+				stringmblen[stringnchars] = mblen = pg_mblen_range(p, pend);
 				stringnchars++;
 				p += mblen;
 				len -= mblen;
@@ -424,10 +428,11 @@ dotrim(const char *string, int stringlen,
 			setnchars = 0;
 			p = set;
 			len = setlen;
+			setend = set + setlen;
 			while (len > 0)
 			{
 				setchars[setnchars] = p;
-				setmblen[setnchars] = mblen = pg_mblen(p);
+				setmblen[setnchars] = mblen = pg_mblen_range(p, setend);
 				setnchars++;
 				p += mblen;
 				len -= mblen;
@@ -805,6 +810,8 @@ translate(PG_FUNCTION_ARGS)
 			   *to_end;
 	char	   *source,
 			   *target;
+	const char *source_end;
+	const char *from_end;
 	int			m,
 				fromlen,
 				tolen,
@@ -819,9 +826,11 @@ translate(PG_FUNCTION_ARGS)
 	if (m <= 0)
 		PG_RETURN_TEXT_P(string);
 	source = VARDATA_ANY(string);
+	source_end = source + m;
 
 	fromlen = VARSIZE_ANY_EXHDR(from);
 	from_ptr = VARDATA_ANY(from);
+	from_end = from_ptr + fromlen;
 	tolen = VARSIZE_ANY_EXHDR(to);
 	to_ptr = VARDATA_ANY(to);
 	to_end = to_ptr + tolen;
@@ -845,12 +854,12 @@ translate(PG_FUNCTION_ARGS)
 
 	while (m > 0)
 	{
-		source_len = pg_mblen(source);
+		source_len = pg_mblen_range(source, source_end);
 		from_index = 0;
 
 		for (i = 0; i < fromlen; i += len)
 		{
-			len = pg_mblen(&from_ptr[i]);
+			len = pg_mblen_range(&from_ptr[i], from_end);
 			if (len == source_len &&
 				memcmp(source, &from_ptr[i], len) == 0)
 				break;
@@ -866,11 +875,11 @@ translate(PG_FUNCTION_ARGS)
 			{
 				if (p >= to_end)
 					break;
-				p += pg_mblen(p);
+				p += pg_mblen_range(p, to_end);
 			}
 			if (p < to_end)
 			{
-				len = pg_mblen(p);
+				len = pg_mblen_range(p, to_end);
 				memcpy(target, p, len);
 				target += len;
 				retlen += len;
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 702cd52b6d4..d43a0577ee4 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -444,7 +444,7 @@ parse_re_flags(pg_re_flags *flags, text *opts)
 					ereport(ERROR,
 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 							 errmsg("invalid regular expression option: \"%.*s\"",
-									pg_mblen(opt_p + i), opt_p + i)));
+									pg_mblen_range(opt_p + i, opt_p + opt_len), opt_p + i)));
 					break;
 			}
 		}
@@ -674,12 +674,13 @@ textregexreplace(PG_FUNCTION_ARGS)
 	if (VARSIZE_ANY_EXHDR(opt) > 0)
 	{
 		char	   *opt_p = VARDATA_ANY(opt);
+		const char *end_p = opt_p + VARSIZE_ANY_EXHDR(opt);
 
 		if (*opt_p >= '0' && *opt_p <= '9')
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("invalid regular expression option: \"%.*s\"",
-							pg_mblen(opt_p), opt_p),
+							pg_mblen_range(opt_p, end_p), opt_p),
 					 errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.")));
 	}
 
@@ -773,12 +774,14 @@ similar_escape_internal(text *pat_text, text *esc_text)
 			   *r;
 	int			plen,
 				elen;
+	const char *pend;
 	bool		afterescape = false;
 	bool		incharclass = false;
 	int			nquotes = 0;
 
 	p = VARDATA_ANY(pat_text);
 	plen = VARSIZE_ANY_EXHDR(pat_text);
+	pend = p + plen;
 	if (esc_text == NULL)
 	{
 		/* No ESCAPE clause provided; default to backslash as escape */
@@ -867,7 +870,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
 
 		if (elen > 1)
 		{
-			int			mblen = pg_mblen(p);
+			int			mblen = pg_mblen_range(p, pend);
 
 			if (mblen > 1)
 			{
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 001f2cc0299..244187e474b 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -120,7 +120,7 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
 		return buf;
 
 	buf++;
-	while (*buf && pg_mblen(buf) == 1)
+	while (*buf && pg_mblen_cstr(buf) == 1)
 	{
 		switch (*buf)
 		{
@@ -197,7 +197,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
 					continue;
 				}
 
-				if (!t_isdigit(ptr))
+				if (!t_isdigit_cstr(ptr))
 					return false;
 
 				errno = 0;
@@ -259,12 +259,12 @@ parse_or_operator(TSQueryParserState pstate)
 		return false;
 
 	/* it shouldn't be a part of any word */
-	if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr))
+	if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum_cstr(ptr))
 		return false;
 
 	for (;;)
 	{
-		ptr += pg_mblen(ptr);
+		ptr += pg_mblen_cstr(ptr);
 
 		if (*ptr == '\0')		/* got end of string without operand */
 			return false;
@@ -274,7 +274,7 @@ parse_or_operator(TSQueryParserState pstate)
 		 * So we still treat OR literal as operation with possibly incorrect
 		 * operand and will not search it as lexeme
 		 */
-		if (!t_isspace(ptr))
+		if (!t_isspace_cstr(ptr))
 			break;
 	}
 
@@ -315,7 +315,7 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
 					/* generic syntax error message is fine */
 					return PT_ERR;
 				}
-				else if (!t_isspace(state->buf))
+				else if (!t_isspace_cstr(state->buf))
 				{
 					/*
 					 * We rely on the tsvector parser to parse the value for
@@ -383,14 +383,14 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
 				{
 					return (state->count) ? PT_ERR : PT_END;
 				}
-				else if (!t_isspace(state->buf))
+				else if (!t_isspace_cstr(state->buf))
 				{
 					return PT_ERR;
 				}
 				break;
 		}
 
-		state->buf += pg_mblen(state->buf);
+		state->buf += pg_mblen_cstr(state->buf);
 	}
 }
 
@@ -444,7 +444,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
 					state->state = WAITOPERAND;
 					continue;
 				}
-				else if (!t_isspace(state->buf))
+				else if (!t_isspace_cstr(state->buf))
 				{
 					/*
 					 * We rely on the tsvector parser to parse the value for
@@ -492,7 +492,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
 					state->buf++;
 					continue;
 				}
-				else if (!t_isspace(state->buf))
+				else if (!t_isspace_cstr(state->buf))
 				{
 					/* insert implicit AND between operands */
 					state->state = WAITOPERAND;
@@ -502,7 +502,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
 				break;
 		}
 
-		state->buf += pg_mblen(state->buf);
+		state->buf += pg_mblen_cstr(state->buf);
 	}
 }
 
@@ -1014,9 +1014,8 @@ infix(INFIX *in, int parentPriority, bool rightPhraseOp)
 				*(in->cur) = '\\';
 				in->cur++;
 			}
-			COPYCHAR(in->cur, op);
 
-			clen = pg_mblen(op);
+			clen = ts_copychar_cstr(in->cur, op);
 			op += clen;
 			in->cur += clen;
 		}
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 85c492d122a..39e16f8a7cd 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -320,9 +320,9 @@ tsvectorout(PG_FUNCTION_ARGS)
 				lenbuf = 0,
 				pp;
 	WordEntry  *ptr = ARRPTR(out);
-	char	   *curbegin,
-			   *curin,
+	char	   *curin,
 			   *curout;
+	const char *curend;
 
 	lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
 	for (i = 0; i < out->size; i++)
@@ -335,13 +335,14 @@ tsvectorout(PG_FUNCTION_ARGS)
 	curout = outbuf = (char *) palloc(lenbuf);
 	for (i = 0; i < out->size; i++)
 	{
-		curbegin = curin = STRPTR(out) + ptr->pos;
+		curin = STRPTR(out) + ptr->pos;
+		curend = curin + ptr->len;
 		if (i != 0)
 			*curout++ = ' ';
 		*curout++ = '\'';
-		while (curin - curbegin < ptr->len)
+		while (curin < curend)
 		{
-			int			len = pg_mblen(curin);
+			int			len = pg_mblen_range(curin, curend);
 
 			if (t_iseq(curin, '\''))
 				*curout++ = '\'';
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index f511a28bb04..ae90e750604 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -2615,11 +2615,15 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
 	if (ws)
 	{
 		char	   *buf;
+		const char *end;
 
 		buf = VARDATA_ANY(ws);
-		while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
+		end = buf + VARSIZE_ANY_EXHDR(ws);
+		while (buf < end)
 		{
-			if (pg_mblen(buf) == 1)
+			int			len = pg_mblen_range(buf, end);
+
+			if (len == 1)
 			{
 				switch (*buf)
 				{
@@ -2643,7 +2647,7 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
 						stat->weight |= 0;
 				}
 			}
-			buf += pg_mblen(buf);
+			buf += len;
 		}
 	}
 
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
index 13e075831fe..e4b91f8d3c4 100644
--- a/src/backend/utils/adt/tsvector_parser.c
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -206,10 +206,9 @@ gettoken_tsvector(TSVectorParseState state,
 			else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
 					 (state->is_web && t_iseq(state->prsbuf, '"')))
 				PRSSYNTAXERROR;
-			else if (!t_isspace(state->prsbuf))
+			else if (!t_isspace_cstr(state->prsbuf))
 			{
-				COPYCHAR(curpos, state->prsbuf);
-				curpos += pg_mblen(state->prsbuf);
+				curpos += ts_copychar_cstr(curpos, state->prsbuf);
 				statecode = WAITENDWORD;
 			}
 		}
@@ -223,8 +222,7 @@ gettoken_tsvector(TSVectorParseState state,
 			else
 			{
 				RESIZEPRSBUF;
-				COPYCHAR(curpos, state->prsbuf);
-				curpos += pg_mblen(state->prsbuf);
+				curpos += ts_copychar_cstr(curpos, state->prsbuf);
 				Assert(oldstate != 0);
 				statecode = oldstate;
 			}
@@ -236,7 +234,7 @@ gettoken_tsvector(TSVectorParseState state,
 				statecode = WAITNEXTCHAR;
 				oldstate = WAITENDWORD;
 			}
-			else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
+			else if (t_isspace_cstr(state->prsbuf) || *(state->prsbuf) == '\0' ||
 					 (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
 					 (state->is_web && t_iseq(state->prsbuf, '"')))
 			{
@@ -259,8 +257,7 @@ gettoken_tsvector(TSVectorParseState state,
 			else
 			{
 				RESIZEPRSBUF;
-				COPYCHAR(curpos, state->prsbuf);
-				curpos += pg_mblen(state->prsbuf);
+				curpos += ts_copychar_cstr(curpos, state->prsbuf);
 			}
 		}
 		else if (statecode == WAITENDCMPLX)
@@ -279,8 +276,7 @@ gettoken_tsvector(TSVectorParseState state,
 			else
 			{
 				RESIZEPRSBUF;
-				COPYCHAR(curpos, state->prsbuf);
-				curpos += pg_mblen(state->prsbuf);
+				curpos += ts_copychar_cstr(curpos, state->prsbuf);
 			}
 		}
 		else if (statecode == WAITCHARCMPLX)
@@ -288,8 +284,7 @@ gettoken_tsvector(TSVectorParseState state,
 			if (!state->is_web && t_iseq(state->prsbuf, '\''))
 			{
 				RESIZEPRSBUF;
-				COPYCHAR(curpos, state->prsbuf);
-				curpos += pg_mblen(state->prsbuf);
+				curpos += ts_copychar_cstr(curpos, state->prsbuf);
 				statecode = WAITENDCMPLX;
 			}
 			else
@@ -300,7 +295,7 @@ gettoken_tsvector(TSVectorParseState state,
 					PRSSYNTAXERROR;
 				if (state->oprisdelim)
 				{
-					/* state->prsbuf+=pg_mblen(state->prsbuf); */
+					/* state->prsbuf+=pg_mblen_cstr(state->prsbuf); */
 					RETURN_TOKEN;
 				}
 				else
@@ -317,7 +312,7 @@ gettoken_tsvector(TSVectorParseState state,
 		}
 		else if (statecode == INPOSINFO)
 		{
-			if (t_isdigit(state->prsbuf))
+			if (t_isdigit_cstr(state->prsbuf))
 			{
 				if (posalen == 0)
 				{
@@ -372,10 +367,10 @@ gettoken_tsvector(TSVectorParseState state,
 					PRSSYNTAXERROR;
 				WEP_SETWEIGHT(pos[npos - 1], 0);
 			}
-			else if (t_isspace(state->prsbuf) ||
+			else if (t_isspace_cstr(state->prsbuf) ||
 					 *(state->prsbuf) == '\0')
 				RETURN_TOKEN;
-			else if (!t_isdigit(state->prsbuf))
+			else if (!t_isdigit_cstr(state->prsbuf))
 				PRSSYNTAXERROR;
 		}
 		else					/* internal error */
@@ -383,6 +378,6 @@ gettoken_tsvector(TSVectorParseState state,
 				 statecode);
 
 		/* get next char */
-		state->prsbuf += pg_mblen(state->prsbuf);
+		state->prsbuf += pg_mblen_cstr(state->prsbuf);
 	}
 }
diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c
index 7e1457cb9ef..c53356bbb46 100644
--- a/src/backend/utils/adt/varbit.c
+++ b/src/backend/utils/adt/varbit.c
@@ -233,7 +233,7 @@ bit_in(PG_FUNCTION_ARGS)
 				ereturn(escontext, (Datum) 0,
 						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 						 errmsg("\"%.*s\" is not a valid binary digit",
-								pg_mblen(sp), sp)));
+								pg_mblen_cstr(sp), sp)));
 
 			x >>= 1;
 			if (x == 0)
@@ -258,7 +258,7 @@ bit_in(PG_FUNCTION_ARGS)
 				ereturn(escontext, (Datum) 0,
 						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 						 errmsg("\"%.*s\" is not a valid hexadecimal digit",
-								pg_mblen(sp), sp)));
+								pg_mblen_cstr(sp), sp)));
 
 			if (bc)
 			{
@@ -534,7 +534,7 @@ varbit_in(PG_FUNCTION_ARGS)
 				ereturn(escontext, (Datum) 0,
 						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 						 errmsg("\"%.*s\" is not a valid binary digit",
-								pg_mblen(sp), sp)));
+								pg_mblen_cstr(sp), sp)));
 
 			x >>= 1;
 			if (x == 0)
@@ -559,7 +559,7 @@ varbit_in(PG_FUNCTION_ARGS)
 				ereturn(escontext, (Datum) 0,
 						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 						 errmsg("\"%.*s\" is not a valid hexadecimal digit",
-								pg_mblen(sp), sp)));
+								pg_mblen_cstr(sp), sp)));
 
 			if (bc)
 			{
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index b5f0018e8f3..1f8fcd1f406 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -797,8 +797,11 @@ text_catenate(text *t1, text *t2)
  * charlen_to_bytelen()
  *	Compute the number of bytes occupied by n characters starting at *p
  *
- * It is caller's responsibility that there actually are n characters;
- * the string need not be null-terminated.
+ * The caller shall ensure there are n complete characters.  Callers achieve
+ * this by deriving "n" from regmatch_t findings from searching a wchar array.
+ * pg_mb2wchar_with_len() skips any trailing incomplete character, so regex
+ * matches will end no later than the last complete character.  (The string
+ * need not be null-terminated.)
  */
 static int
 charlen_to_bytelen(const char *p, int n)
@@ -813,7 +816,7 @@ charlen_to_bytelen(const char *p, int n)
 		const char *s;
 
 		for (s = p; n > 0; n--)
-			s += pg_mblen(s);
+			s += pg_mblen_unbounded(s); /* caller verified encoding */
 
 		return s - p;
 	}
@@ -946,6 +949,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 		int32		slice_start;
 		int32		slice_size;
 		int32		slice_strlen;
+		int32		slice_len;
 		text	   *slice;
 		int32		E1;
 		int32		i;
@@ -1015,7 +1019,8 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 			slice = (text *) DatumGetPointer(str);
 
 		/* see if we got back an empty string */
-		if (VARSIZE_ANY_EXHDR(slice) == 0)
+		slice_len = VARSIZE_ANY_EXHDR(slice);
+		if (slice_len == 0)
 		{
 			if (slice != (text *) DatumGetPointer(str))
 				pfree(slice);
@@ -1024,7 +1029,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 
 		/* Now we can get the actual length of the slice in MB characters */
 		slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
-											VARSIZE_ANY_EXHDR(slice));
+											slice_len);
 
 		/*
 		 * Check that the start position wasn't > slice_strlen. If so, SQL99
@@ -1051,7 +1056,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 		 */
 		p = VARDATA_ANY(slice);
 		for (i = 0; i < S1 - 1; i++)
-			p += pg_mblen(p);
+			p += pg_mblen_unbounded(p);
 
 		/* hang onto a pointer to our start position */
 		s = p;
@@ -1061,7 +1066,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 		 * length.
 		 */
 		for (i = S1; i < E1; i++)
-			p += pg_mblen(p);
+			p += pg_mblen_unbounded(p);
 
 		ret = (text *) palloc(VARHDRSZ + (p - s));
 		SET_VARSIZE(ret, VARHDRSZ + (p - s));
@@ -1359,6 +1364,8 @@ text_position_next(TextPositionState *state)
 	 */
 	if (state->is_multibyte_char_in_char)
 	{
+		const char *haystack_end = state->str1 + state->len1;
+
 		/* Walk one character at a time, until we reach the match. */
 
 		/* the search should never move backwards. */
@@ -1367,7 +1374,7 @@ text_position_next(TextPositionState *state)
 		while (state->refpoint < matchptr)
 		{
 			/* step to next character. */
-			state->refpoint += pg_mblen(state->refpoint);
+			state->refpoint += pg_mblen_range(state->refpoint, haystack_end);
 			state->refpos++;
 
 			/*
@@ -4682,6 +4689,8 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
 	}
 	else
 	{
+		const char *end_ptr;
+
 		/*
 		 * When fldsep is NULL, each character in the input string becomes a
 		 * separate element in the result set.  The separator is effectively
@@ -4690,10 +4699,11 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
 		inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
 
 		start_ptr = VARDATA_ANY(inputstring);
+		end_ptr = start_ptr + inputstring_len;
 
 		while (inputstring_len > 0)
 		{
-			int			chunk_len = pg_mblen(start_ptr);
+			int			chunk_len = pg_mblen_range(start_ptr, end_ptr);
 
 			CHECK_FOR_INTERRUPTS();
 
@@ -5524,7 +5534,7 @@ text_reverse(PG_FUNCTION_ARGS)
 		{
 			int			sz;
 
-			sz = pg_mblen(p);
+			sz = pg_mblen_range(p, endp);
 			dst -= sz;
 			memcpy(dst, p, sz);
 			p += sz;
@@ -5685,7 +5695,7 @@ text_format(PG_FUNCTION_ARGS)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("unrecognized format() type specifier \"%.*s\"",
-							pg_mblen(cp), cp),
+							pg_mblen_range(cp, end_ptr), cp),
 					 errhint("For a single \"%%\" use \"%%%%\".")));
 
 		/* If indirect width was specified, get its value */
@@ -5806,7 +5816,7 @@ text_format(PG_FUNCTION_ARGS)
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 						 errmsg("unrecognized format() type specifier \"%.*s\"",
-								pg_mblen(cp), cp),
+								pg_mblen_range(cp, end_ptr), cp),
 						 errhint("For a single \"%%\" use \"%%%%\".")));
 				break;
 		}
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 1537adfb7bf..45132fcc0fa 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -2329,8 +2329,7 @@ sqlchar_to_unicode(const char *s)
 	char	   *utf8string;
 	pg_wchar	ret[2];			/* need space for trailing zero */
 
-	/* note we're not assuming s is null-terminated */
-	utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
+	utf8string = pg_server_to_any(s, pg_mblen_cstr(s), PG_UTF8);
 
 	pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
 								  pg_encoding_mblen(PG_UTF8, utf8string));
@@ -2383,7 +2382,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
 
 	initStringInfo(&buf);
 
-	for (p = ident; *p; p += pg_mblen(p))
+	for (p = ident; *p; p += pg_mblen_cstr(p))
 	{
 		if (*p == ':' && (p == ident || fully_escaped))
 			appendStringInfoString(&buf, "_x003A_");
@@ -2408,7 +2407,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
 				: !is_valid_xml_namechar(u))
 				appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
 			else
-				appendBinaryStringInfo(&buf, p, pg_mblen(p));
+				appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
 		}
 	}
 
@@ -2431,7 +2430,7 @@ map_xml_name_to_sql_identifier(const char *name)
 
 	initStringInfo(&buf);
 
-	for (p = name; *p; p += pg_mblen(p))
+	for (p = name; *p; p += pg_mblen_cstr(p))
 	{
 		if (*p == '_' && *(p + 1) == 'x'
 			&& isxdigit((unsigned char) *(p + 2))
@@ -2449,7 +2448,7 @@ map_xml_name_to_sql_identifier(const char *name)
 			p += 6;
 		}
 		else
-			appendBinaryStringInfo(&buf, p, pg_mblen(p));
+			appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
 	}
 
 	return buf.data;
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 65c20bde39e..a8901a957eb 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -43,6 +43,7 @@
 #include "catalog/pg_directory_table.h"
 #include "catalog/pg_enum.h"
 #include "catalog/pg_event_trigger.h"
+#include "catalog/pg_extension.h"
 #include "catalog/pg_foreign_data_wrapper.h"
 #include "catalog/pg_foreign_server.h"
 #include "catalog/pg_foreign_table.h"
@@ -823,6 +824,13 @@ static const struct cachedesc cacheinfo[] = {
 			0
 		},
 		128
+	},
+	/* intentionally out of alphabetical order, to avoid an ABI break: */
+	[EXTENSIONOID] = {
+		ExtensionRelationId,
+		ExtensionOidIndexId,
+		KEY(Anum_pg_extension_oid),
+		2
 	}
 };
 
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 87ed364aab4..0477acc1e08 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -38,6 +38,7 @@
 #include "catalog/namespace.h"
 #include "mb/pg_wchar.h"
 #include "utils/builtins.h"
+#include "utils/memdebug.h"
 #include "utils/memutils.h"
 #include "utils/syscache.h"
 #include "varatt.h"
@@ -100,6 +101,13 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
 									FmgrInfo *custom_encoding_proc);
 static int	cliplen(const char *str, int len, int limit);
 
+pg_attribute_noreturn()
+static void report_invalid_encoding_int(int encoding, const char *mbstr,
+										int mblen, int len);
+
+pg_attribute_noreturn()
+static void report_invalid_encoding_db(const char *mbstr, int mblen, int len);
+
 
 /*
  * Prepare for a future call to SetClientEncoding.  Success should mean
@@ -1149,11 +1157,126 @@ pg_encoding_wchar2mb_with_len(int encoding,
 	return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
 }
 
-/* returns the byte length of a multibyte character */
+/*
+ * Returns the byte length of a multibyte character sequence in a
+ * null-terminated string.  Raises an illegal byte sequence error if the
+ * sequence would hit a null terminator.
+ *
+ * The caller is expected to have checked for a terminator at *mbstr == 0
+ * before calling, but some callers want 1 in that case, so this function
+ * continues that tradition.
+ *
+ * This must only be used for strings that have a null-terminator to enable
+ * bounds detection.
+ */
+int
+pg_mblen_cstr(const char *mbstr)
+{
+	int			length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+
+	/*
+	 * The .mblen functions return 1 when given a pointer to a terminator.
+	 * Some callers depend on that, so we tolerate it for now.  Well-behaved
+	 * callers check the leading byte for a terminator *before* calling.
+	 */
+	for (int i = 1; i < length; ++i)
+		if (unlikely(mbstr[i] == 0))
+			report_invalid_encoding_db(mbstr, length, i);
+
+	/*
+	 * String should be NUL-terminated, but checking that would make typical
+	 * callers O(N^2), tripling Valgrind check-world time.  Unless
+	 * VALGRIND_EXPENSIVE, check 1 byte after each actual character.  (If we
+	 * found a character, not a terminator, the next byte must be a terminator
+	 * or the start of the next character.)  If the caller iterates the whole
+	 * string, the last call will diagnose a missing terminator.
+	 */
+	if (mbstr[0] != '\0')
+	{
+#ifdef VALGRIND_EXPENSIVE
+		VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, strlen(mbstr));
+#else
+		VALGRIND_CHECK_MEM_IS_DEFINED(mbstr + length, 1);
+#endif
+	}
+
+	return length;
+}
+
+/*
+ * Returns the byte length of a multibyte character sequence bounded by a range
+ * [mbstr, end) of at least one byte in size.  Raises an illegal byte sequence
+ * error if the sequence would exceed the range.
+ */
+int
+pg_mblen_range(const char *mbstr, const char *end)
+{
+	int			length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+
+	Assert(end > mbstr);
+#ifdef VALGRIND_EXPENSIVE
+	VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, end - mbstr);
+#else
+	VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
+#endif
+
+	if (unlikely(mbstr + length > end))
+		report_invalid_encoding_db(mbstr, length, end - mbstr);
+
+	return length;
+}
+
+/*
+ * Returns the byte length of a multibyte character sequence bounded by a range
+ * extending for 'limit' bytes, which must be at least one.  Raises an illegal
+ * byte sequence error if the sequence would exceed the range.
+ */
+int
+pg_mblen_with_len(const char *mbstr, int limit)
+{
+	int			length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+
+	Assert(limit >= 1);
+#ifdef VALGRIND_EXPENSIVE
+	VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, limit);
+#else
+	VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
+#endif
+
+	if (unlikely(length > limit))
+		report_invalid_encoding_db(mbstr, length, limit);
+
+	return length;
+}
+
+
+/*
+ * Returns the length of a multibyte character sequence, without any
+ * validation of bounds.
+ *
+ * PLEASE NOTE:  This function can only be used safely if the caller has
+ * already verified the input string, since otherwise there is a risk of
+ * overrunning the buffer if the string is invalid.  A prior call to a
+ * pg_mbstrlen* function suffices.
+ */
+int
+pg_mblen_unbounded(const char *mbstr)
+{
+	int			length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+
+	VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
+
+	return length;
+}
+
+/*
+ * Historical name for pg_mblen_unbounded().  Should not be used and will be
+ * removed in a later version.
+ */
 int
 pg_mblen(const char *mbstr)
 {
-	return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+	return pg_mblen_unbounded(mbstr);
 }
 
 /* returns the display length of a multibyte character */
@@ -1175,14 +1298,14 @@ pg_mbstrlen(const char *mbstr)
 
 	while (*mbstr)
 	{
-		mbstr += pg_mblen(mbstr);
+		mbstr += pg_mblen_cstr(mbstr);
 		len++;
 	}
 	return len;
 }
 
 /* returns the length (counted in wchars) of a multibyte string
- * (not necessarily NULL terminated)
+ * (stops at the first of "limit" or a NUL)
  */
 int
 pg_mbstrlen_with_len(const char *mbstr, int limit)
@@ -1195,7 +1318,7 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)
 
 	while (limit > 0 && *mbstr)
 	{
-		int			l = pg_mblen(mbstr);
+		int			l = pg_mblen_with_len(mbstr, limit);
 
 		limit -= l;
 		mbstr += l;
@@ -1265,7 +1388,7 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit)
 
 	while (len > 0 && *mbstr)
 	{
-		l = pg_mblen(mbstr);
+		l = pg_mblen_with_len(mbstr, len);
 		nch++;
 		if (nch > limit)
 			break;
@@ -1835,12 +1958,19 @@ void
 report_invalid_encoding(int encoding, const char *mbstr, int len)
 {
 	int			l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len);
+
+	report_invalid_encoding_int(encoding, mbstr, l, len);
+}
+
+static void
+report_invalid_encoding_int(int encoding, const char *mbstr, int mblen, int len)
+{
 	char		buf[8 * 5 + 1];
 	char	   *p = buf;
 	int			j,
 				jlimit;
 
-	jlimit = Min(l, len);
+	jlimit = Min(mblen, len);
 	jlimit = Min(jlimit, 8);	/* prevent buffer overrun */
 
 	for (j = 0; j < jlimit; j++)
@@ -1857,6 +1987,12 @@ report_invalid_encoding(int encoding, const char *mbstr, int len)
 					buf)));
 }
 
+static void
+report_invalid_encoding_db(const char *mbstr, int mblen, int len)
+{
+	report_invalid_encoding_int(GetDatabaseEncoding(), mbstr, mblen, len);
+}
+
 /*
  * report_untranslatable_char: complain about untranslatable character
  *
diff --git a/src/common/wchar.c b/src/common/wchar.c
index c0fb19b3f1a..5133e5e5b25 100644
--- a/src/common/wchar.c
+++ b/src/common/wchar.c
@@ -82,6 +82,9 @@
  * subset to the ASCII routines to ensure consistency.
  */
 
+/* No error-reporting facility.  Ignore incomplete trailing byte sequence. */
+#define MB2CHAR_NEED_AT_LEAST(len, need) if ((len) < (need)) break
+
 /*
  * SQL/ASCII
  */
@@ -127,22 +130,24 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 
 	while (len > 0 && *from)
 	{
-		if (*from == SS2 && len >= 2)	/* JIS X 0201 (so called "1 byte
-										 * KANA") */
+		if (*from == SS2)		/* JIS X 0201 (so called "1 byte KANA") */
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 2);
 			from++;
 			*to = (SS2 << 8) | *from++;
 			len -= 2;
 		}
-		else if (*from == SS3 && len >= 3)	/* JIS X 0212 KANJI */
+		else if (*from == SS3)	/* JIS X 0212 KANJI */
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 3);
 			from++;
 			*to = (SS3 << 16) | (*from++ << 8);
 			*to |= *from++;
 			len -= 3;
 		}
-		else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
+		else if (IS_HIGHBIT_SET(*from)) /* JIS X 0208 KANJI */
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 2);
 			*to = *from++ << 8;
 			*to |= *from++;
 			len -= 2;
@@ -254,22 +259,25 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 
 	while (len > 0 && *from)
 	{
-		if (*from == SS2 && len >= 3)	/* code set 2 (unused?) */
+		if (*from == SS2)		/* code set 2 (unused?) */
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 3);
 			from++;
 			*to = (SS2 << 16) | (*from++ << 8);
 			*to |= *from++;
 			len -= 3;
 		}
-		else if (*from == SS3 && len >= 3)	/* code set 3 (unused ?) */
+		else if (*from == SS3)	/* code set 3 (unused ?) */
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 3);
 			from++;
 			*to = (SS3 << 16) | (*from++ << 8);
 			*to |= *from++;
 			len -= 3;
 		}
-		else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
+		else if (IS_HIGHBIT_SET(*from)) /* code set 1 */
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 2);
 			*to = *from++ << 8;
 			*to |= *from++;
 			len -= 2;
@@ -286,12 +294,22 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 	return cnt;
 }
 
+/*
+ * mbverifychar does not accept SS2 or SS3 (CS2 and CS3 are not defined for
+ * EUC_CN), but mb2wchar_with_len does.  Tell a coherent story for code that
+ * relies on agreement between mb2wchar_with_len and mblen.  Invalid text
+ * datums (e.g. from shared catalogs) reach this.
+ */
 static int
 pg_euccn_mblen(const unsigned char *s)
 {
 	int			len;
 
-	if (IS_HIGHBIT_SET(*s))
+	if (*s == SS2)
+		len = 3;
+	else if (*s == SS3)
+		len = 3;
+	else if (IS_HIGHBIT_SET(*s))
 		len = 2;
 	else
 		len = 1;
@@ -321,23 +339,26 @@ pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 
 	while (len > 0 && *from)
 	{
-		if (*from == SS2 && len >= 4)	/* code set 2 */
+		if (*from == SS2)		/* code set 2 */
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 4);
 			from++;
 			*to = (((uint32) SS2) << 24) | (*from++ << 16);
 			*to |= *from++ << 8;
 			*to |= *from++;
 			len -= 4;
 		}
-		else if (*from == SS3 && len >= 3)	/* code set 3 (unused?) */
+		else if (*from == SS3)	/* code set 3 (unused?) */
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 3);
 			from++;
 			*to = (SS3 << 16) | (*from++ << 8);
 			*to |= *from++;
 			len -= 3;
 		}
-		else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
+		else if (IS_HIGHBIT_SET(*from)) /* code set 2 */
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 2);
 			*to = *from++ << 8;
 			*to |= *from++;
 			len -= 2;
@@ -474,8 +495,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 		}
 		else if ((*from & 0xe0) == 0xc0)
 		{
-			if (len < 2)
-				break;			/* drop trailing incomplete char */
+			MB2CHAR_NEED_AT_LEAST(len, 2);
 			c1 = *from++ & 0x1f;
 			c2 = *from++ & 0x3f;
 			*to = (c1 << 6) | c2;
@@ -483,8 +503,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 		}
 		else if ((*from & 0xf0) == 0xe0)
 		{
-			if (len < 3)
-				break;			/* drop trailing incomplete char */
+			MB2CHAR_NEED_AT_LEAST(len, 3);
 			c1 = *from++ & 0x0f;
 			c2 = *from++ & 0x3f;
 			c3 = *from++ & 0x3f;
@@ -493,8 +512,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 		}
 		else if ((*from & 0xf8) == 0xf0)
 		{
-			if (len < 4)
-				break;			/* drop trailing incomplete char */
+			MB2CHAR_NEED_AT_LEAST(len, 4);
 			c1 = *from++ & 0x07;
 			c2 = *from++ & 0x3f;
 			c3 = *from++ & 0x3f;
@@ -757,28 +775,32 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 
 	while (len > 0 && *from)
 	{
-		if (IS_LC1(*from) && len >= 2)
+		if (IS_LC1(*from))
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 2);
 			*to = *from++ << 16;
 			*to |= *from++;
 			len -= 2;
 		}
-		else if (IS_LCPRV1(*from) && len >= 3)
+		else if (IS_LCPRV1(*from))
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 3);
 			from++;
 			*to = *from++ << 16;
 			*to |= *from++;
 			len -= 3;
 		}
-		else if (IS_LC2(*from) && len >= 3)
+		else if (IS_LC2(*from))
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 3);
 			*to = *from++ << 16;
 			*to |= *from++ << 8;
 			*to |= *from++;
 			len -= 3;
 		}
-		else if (IS_LCPRV2(*from) && len >= 4)
+		else if (IS_LCPRV2(*from))
 		{
+			MB2CHAR_NEED_AT_LEAST(len, 4);
 			from++;
 			*to = *from++ << 16;
 			*to |= *from++ << 8;
@@ -2145,7 +2167,7 @@ pg_encoding_set_invalid(int encoding, char *dst)
 const pg_wchar_tbl pg_wchar_table[] = {
 	{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1},	/* PG_SQL_ASCII */
 	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},	/* PG_EUC_JP */
-	{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2},	/* PG_EUC_CN */
+	{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 3},	/* PG_EUC_CN */
 	{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3},	/* PG_EUC_KR */
 	{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4},	/* PG_EUC_TW */
 	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},	/* PG_EUC_JIS_2004 */
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index 0ea7b3cda81..6a7ae2abea9 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -245,6 +245,8 @@ extern long changeDependenciesOn(Oid refClassId, Oid oldRefObjectId,
 extern Oid	getExtensionOfObject(Oid classId, Oid objectId);
 extern List *getAutoExtensionsOfObject(Oid classId, Oid objectId);
 
+extern Oid	getExtensionType(Oid extensionOid, const char *typname);
+
 extern bool sequenceIsOwned(Oid seqId, char deptype, Oid *tableId, int32 *colId);
 extern List *getOwnedSequences(Oid relid);
 extern Oid	getIdentitySequence(Oid relid, AttrNumber attnum, bool missing_ok);
diff --git a/src/include/commands/extension.h b/src/include/commands/extension.h
index 042ae6ba70d..f2e45cf59ea 100644
--- a/src/include/commands/extension.h
+++ b/src/include/commands/extension.h
@@ -50,6 +50,8 @@ extern char *get_extension_name(Oid ext_oid);
 extern Oid	get_extension_schema(Oid ext_oid);
 extern bool extension_file_exists(const char *extensionName);
 
+extern Oid	get_function_sibling_type(Oid funcoid, const char *typname);
+
 extern ObjectAddress AlterExtensionNamespace(const char *extensionName, const char *newschema,
 											 Oid *oldschema);
 
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index c2cc2ad0963..cd9027a444a 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -608,7 +608,14 @@ extern int	pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2);
 extern int	pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
 extern int	pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);
 extern size_t pg_wchar_strlen(const pg_wchar *str);
+extern int	pg_mblen_cstr(const char *mbstr);
+extern int	pg_mblen_range(const char *mbstr, const char *end);
+extern int	pg_mblen_with_len(const char *mbstr, int limit);
+extern int	pg_mblen_unbounded(const char *mbstr);
+
+/* deprecated */
 extern int	pg_mblen(const char *mbstr);
+
 extern int	pg_dsplen(const char *mbstr);
 extern int	pg_mbstrlen(const char *mbstr);
 extern int	pg_mbstrlen_with_len(const char *mbstr, int limit);
diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h
index 58d594d4006..787ffb165d1 100644
--- a/src/include/tsearch/ts_locale.h
+++ b/src/include/tsearch/ts_locale.h
@@ -37,13 +37,37 @@ typedef struct
 /* The second argument of t_iseq() must be a plain ASCII character */
 #define t_iseq(x,c)		(TOUCHAR(x) == (unsigned char) (c))
 
-#define COPYCHAR(d,s)	memcpy(d, s, pg_mblen(s))
+/* Copy multibyte character of known byte length, return byte length. */
+static inline int
+ts_copychar_with_len(void *dest, const void *src, int length)
+{
+	memcpy(dest, src, length);
+	return length;
+}
+
+/* Copy multibyte character from null-terminated string,  return byte length. */
+static inline int
+ts_copychar_cstr(void *dest, const void *src)
+{
+	return ts_copychar_with_len(dest, src, pg_mblen_cstr((const char *) src));
+}
+
+/* Historical macro for the above. */
+#define COPYCHAR ts_copychar_cstr
+
+#define GENERATE_T_ISCLASS_DECL(character_class) \
+extern int	t_is##character_class##_with_len(const char *ptr, int len); \
+extern int	t_is##character_class##_cstr(const char *ptr); \
+extern int	t_is##character_class##_unbounded(const char *ptr); \
+\
+/* deprecated */ \
+extern int	t_is##character_class(const char *ptr);
 
-extern int	t_isdigit(const char *ptr);
-extern int	t_isspace(const char *ptr);
-extern int	t_isalpha(const char *ptr);
-extern int	t_isalnum(const char *ptr);
-extern int	t_isprint(const char *ptr);
+GENERATE_T_ISCLASS_DECL(alnum);
+GENERATE_T_ISCLASS_DECL(alpha);
+GENERATE_T_ISCLASS_DECL(digit);
+GENERATE_T_ISCLASS_DECL(print);
+GENERATE_T_ISCLASS_DECL(space);
 
 extern char *lowerstr(const char *str);
 extern char *lowerstr_with_len(const char *str, int len);
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index d3dc8bae475..48db1b800a1 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -40,14 +40,12 @@ extern bool gettoken_tsvector(TSVectorParseState state,
 extern void close_tsvector_parser(TSVectorParseState state);
 
 /* phrase operator begins with '<' */
-#define ISOPERATOR(x) \
-	( pg_mblen(x) == 1 && ( *(x) == '!' ||	\
-							*(x) == '&' ||	\
-							*(x) == '|' ||	\
-							*(x) == '(' ||	\
-							*(x) == ')' ||	\
-							*(x) == '<'		\
-						  ) )
+#define ISOPERATOR(x)		(*(x) == '!' ||	\
+							 *(x) == '&' ||	\
+							 *(x) == '|' ||	\
+							 *(x) == '(' ||	\
+							 *(x) == ')' ||	\
+							 *(x) == '<')
 
 /* parse_tsquery */
 
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index cb309ede6ae..bcba170c327 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -70,6 +70,7 @@ extern int64 get_size_from_segDBs(const char *cmd);
 
 /* oid.c */
 extern oidvector *buildoidvector(const Oid *oids, int n);
+extern void check_valid_oidvector(const oidvector *oidArray);
 extern Oid	oidparse(Node *node);
 
 /* pseudotypes.c */
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index b0658a94bd9..e790dfe2af5 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -132,9 +132,12 @@ enum SysCacheIdentifier
 	MVTABLESMVRELOID,
 	USERMAPPINGOID,
 	USERMAPPINGUSERSERVER,
-	ATTENCODINGNUM
+	ATTENCODINGNUM,
 
-#define SysCacheSize (ATTENCODINGNUM + 1)
+	/* intentionally out of alphabetical order, to avoid an ABI break: */
+	EXTENSIONOID
+
+#define SysCacheSize (EXTENSIONOID + 1)
 };
 
 extern void InitCatalogCache(void);
diff --git a/src/test/modules/test_regex/test_regex.c b/src/test/modules/test_regex/test_regex.c
index d1dd48a993b..3a470dbae44 100644
--- a/src/test/modules/test_regex/test_regex.c
+++ b/src/test/modules/test_regex/test_regex.c
@@ -414,7 +414,8 @@ parse_test_flags(test_re_flags *flags, text *opts)
 					ereport(ERROR,
 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 							 errmsg("invalid regular expression test option: \"%.*s\"",
-									pg_mblen(opt_p + i), opt_p + i)));
+									pg_mblen_range(opt_p + i, opt_p + opt_len),
+									opt_p + i)));
 					break;
 			}
 		}
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 20b401a9ef9..bf69c337ec2 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1601,6 +1601,11 @@ select '[0:1]={1.1,2.2}'::float8[];
 (1 row)
 
 -- all of the above should be accepted
+-- some day we might allow these cases, but for now they're errors:
+select array[]::oidvector;
+ERROR:  array is not a valid oidvector
+select array[]::int2vector;
+ERROR:  array is not a valid int2vector
 -- tests for array aggregates
 CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]) DISTRIBUTED RANDOMLY;
 INSERT INTO arraggtest (f1, f2, f3) VALUES
diff --git a/src/test/regress/expected/encoding.out b/src/test/regress/expected/encoding.out
new file mode 100644
index 00000000000..ea1f38cff41
--- /dev/null
+++ b/src/test/regress/expected/encoding.out
@@ -0,0 +1,401 @@
+/* skip test if not UTF8 server encoding */
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+\getenv libdir PG_LIBDIR
+\getenv dlsuffix PG_DLSUFFIX
+\set regresslib :libdir '/regress' :dlsuffix
+CREATE FUNCTION test_bytea_to_text(bytea) RETURNS text
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_text_to_bytea(text) RETURNS bytea
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_mblen_func(text, text, text, int) RETURNS int
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_text_to_wchars(text, text) RETURNS int[]
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_wchars_to_text(text, int[]) RETURNS text
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_valid_server_encoding(text) RETURNS boolean
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE TABLE regress_encoding(good text, truncated text, with_nul text, truncated_with_nul text);
+INSERT INTO regress_encoding
+VALUES ('café',
+        'caf' || test_bytea_to_text('\xc3'),
+        'café' || test_bytea_to_text('\x00') || 'dcba',
+        'caf' || test_bytea_to_text('\xc300') || 'dcba');
+SELECT good, truncated, with_nul FROM regress_encoding;
+ good | truncated | with_nul 
+------+-----------+----------
+ café | caf       | café
+(1 row)
+
+SELECT length(good) FROM regress_encoding;
+ length 
+--------
+      4
+(1 row)
+
+SELECT substring(good, 3, 1) FROM regress_encoding;
+ substring 
+-----------
+ f
+(1 row)
+
+SELECT substring(good, 4, 1) FROM regress_encoding;
+ substring 
+-----------
+ é
+(1 row)
+
+SELECT regexp_replace(good, '^caf(.)$', '\1') FROM regress_encoding;
+ regexp_replace 
+----------------
+ é
+(1 row)
+
+SELECT reverse(good) FROM regress_encoding;
+ reverse 
+---------
+ éfac
+(1 row)
+
+-- invalid short mb character = error
+SELECT length(truncated) FROM regress_encoding;
+ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
+SELECT substring(truncated, 1, 1) FROM regress_encoding;
+ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
+SELECT reverse(truncated) FROM regress_encoding;
+ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
+-- invalid short mb character = silently dropped
+SELECT regexp_replace(truncated, '^caf(.)$', '\1') FROM regress_encoding;
+ regexp_replace 
+----------------
+ caf
+(1 row)
+
+-- PostgreSQL doesn't allow strings to contain NUL.  If a corrupted string
+-- contains NUL at a character boundary position, some functions treat it as a
+-- character while others treat it as a terminator, as implementation details.
+-- NUL = terminator
+SELECT length(with_nul) FROM regress_encoding;
+ length 
+--------
+      4
+(1 row)
+
+SELECT substring(with_nul, 3, 1) FROM regress_encoding;
+ substring 
+-----------
+ f
+(1 row)
+
+SELECT substring(with_nul, 4, 1) FROM regress_encoding;
+ substring 
+-----------
+ é
+(1 row)
+
+SELECT substring(with_nul, 5, 1) FROM regress_encoding;
+ substring 
+-----------
+ 
+(1 row)
+
+SELECT convert_to(substring(with_nul, 5, 1), 'UTF8') FROM regress_encoding;
+ convert_to 
+------------
+ \x
+(1 row)
+
+SELECT regexp_replace(with_nul, '^caf(.)$', '\1') FROM regress_encoding;
+ regexp_replace 
+----------------
+ é
+(1 row)
+
+-- NUL = character
+SELECT with_nul, reverse(with_nul), reverse(reverse(with_nul)) FROM regress_encoding;
+ with_nul | reverse | reverse 
+----------+---------+---------
+ café     | abcd    | café
+(1 row)
+
+-- If a corrupted string contains NUL in the tail bytes of a multibyte
+-- character (invalid in all encodings), it is considered part of the
+-- character for length purposes.  An error will only be raised in code paths
+-- that convert or verify encodings.
+SELECT length(truncated_with_nul) FROM regress_encoding;
+ length 
+--------
+      8
+(1 row)
+
+SELECT substring(truncated_with_nul, 3, 1) FROM regress_encoding;
+ substring 
+-----------
+ f
+(1 row)
+
+SELECT substring(truncated_with_nul, 4, 1) FROM regress_encoding;
+ substring 
+-----------
+ 
+(1 row)
+
+SELECT convert_to(substring(truncated_with_nul, 4, 1), 'UTF8') FROM regress_encoding;
+ERROR:  invalid byte sequence for encoding "UTF8": 0xc3 0x00
+SELECT substring(truncated_with_nul, 5, 1) FROM regress_encoding;
+ substring 
+-----------
+ d
+(1 row)
+
+SELECT regexp_replace(truncated_with_nul, '^caf(.)dcba$', '\1') = test_bytea_to_text('\xc300') FROM regress_encoding;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT reverse(truncated_with_nul) FROM regress_encoding;
+ reverse 
+---------
+ abcd
+(1 row)
+
+-- unbounded: sequence would overrun the string!
+SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated, 3)
+FROM regress_encoding;
+ test_mblen_func 
+-----------------
+               2
+(1 row)
+
+-- condition detected when using the length/range variants
+SELECT test_mblen_func('pg_mblen_with_len', 'UTF8', truncated, 3)
+FROM regress_encoding;
+ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
+SELECT test_mblen_func('pg_mblen_range', 'UTF8', truncated, 3)
+FROM regress_encoding;
+ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
+-- unbounded: sequence would overrun the string, if the terminator were really
+-- the end of it
+SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated_with_nul, 3)
+FROM regress_encoding;
+ test_mblen_func 
+-----------------
+               2
+(1 row)
+
+SELECT test_mblen_func('pg_encoding_mblen', 'GB18030', truncated_with_nul, 3)
+FROM regress_encoding;
+ test_mblen_func 
+-----------------
+               2
+(1 row)
+
+-- condition detected when using the cstr variants
+SELECT test_mblen_func('pg_mblen_cstr', 'UTF8', truncated_with_nul, 3)
+FROM regress_encoding;
+ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
+DROP TABLE regress_encoding;
+-- mb<->wchar conversions
+CREATE FUNCTION test_encoding(encoding text, description text, input bytea)
+RETURNS VOID LANGUAGE plpgsql AS
+$$
+DECLARE
+	prefix text;
+	len int;
+	wchars int[];
+	round_trip bytea;
+	result text;
+BEGIN
+	prefix := rpad(encoding || ' ' || description || ':', 28);
+
+	-- XXX could also test validation, length functions and include client
+	-- only encodings with these test cases
+
+	IF test_valid_server_encoding(encoding) THEN
+		wchars := test_text_to_wchars(encoding, test_bytea_to_text(input));
+		round_trip = test_text_to_bytea(test_wchars_to_text(encoding, wchars));
+		if input = round_trip then
+			result := 'OK';
+		elsif length(input) > length(round_trip) and round_trip = substr(input, 1, length(round_trip)) then
+			result := 'truncated';
+		else
+			result := 'failed';
+		end if;
+		RAISE NOTICE '% % -> % -> % = %', prefix, input, wchars, round_trip, result;
+	END IF;
+END;
+$$;
+-- No validation is done on the encoding itself, just the length to avoid
+-- overruns, so some of the byte sequences below are bogus.  They cover
+-- all code branches, server encodings only for now.
+CREATE TABLE encoding_tests (encoding text, description text, input bytea);
+INSERT INTO encoding_tests VALUES
+	-- LATIN1, other single-byte encodings
+	('LATIN1', 'ASCII',    'a'),
+	('LATIN1', 'extended', '\xe9'),
+	-- EUC_JP, EUC_JIS_2004, EUR_KR (for the purposes of wchar conversion):
+	-- 2 8e (CS2, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
+	-- 3 8f (CS3, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
+	-- 2 80..ff (CS1)
+	('EUC_JP', 'ASCII',      'a'),
+	('EUC_JP', 'CS1, short', '\x80'),
+	('EUC_JP', 'CS1',        '\x8002'),
+	('EUC_JP', 'CS2, short', '\x8e'),
+	('EUC_JP', 'CS2',        '\x8e02'),
+	('EUC_JP', 'CS3, short', '\x8f'),
+	('EUC_JP', 'CS3, short', '\x8f02'),
+	('EUC_JP', 'CS3',        '\x8f0203'),
+	-- EUC_CN
+	-- 3 8e (CS2, not used but arbitrarily considered to have length 3)
+	-- 3 8f (CS3, not used but arbitrarily considered to have length 3)
+	-- 2 80..ff (CS1)
+	('EUC_CN', 'ASCII',      'a'),
+	('EUC_CN', 'CS1, short', '\x80'),
+	('EUC_CN', 'CS1',        '\x8002'),
+	('EUC_CN', 'CS2, short', '\x8e'),
+	('EUC_CN', 'CS2, short', '\x8e02'),
+	('EUC_CN', 'CS2',        '\x8e0203'),
+	('EUC_CN', 'CS3, short', '\x8f'),
+	('EUC_CN', 'CS3, short', '\x8f02'),
+	('EUC_CN', 'CS3',        '\x8f0203'),
+	-- EUC_TW:
+	-- 4 8e (CS2)
+	-- 3 8f (CS3, not used but arbitrarily considered to have length 3)
+	-- 2 80..ff (CS1)
+	('EUC_TW', 'ASCII',      'a'),
+	('EUC_TW', 'CS1, short', '\x80'),
+	('EUC_TW', 'CS1',        '\x8002'),
+	('EUC_TW', 'CS2, short', '\x8e'),
+	('EUC_TW', 'CS2, short', '\x8e02'),
+	('EUC_TW', 'CS2, short', '\x8e0203'),
+	('EUC_TW', 'CS2',        '\x8e020304'),
+	('EUC_TW', 'CS3, short', '\x8f'),
+	('EUC_TW', 'CS3, short', '\x8f02'),
+	('EUC_TW', 'CS3',        '\x8f0203'),
+	-- UTF8
+	-- 2 c0..df
+	-- 3 e0..ef
+	-- 4 f0..f7 (but maximum real codepoint U+10ffff has f4)
+	-- 5 f8..fb (not supported)
+	-- 6 fc..fd (not supported)
+	('UTF8',   'ASCII',               'a'),
+	('UTF8',   '2 byte, short',       '\xdf'),
+	('UTF8',   '2 byte',              '\xdf82'),
+	('UTF8',   '3 byte, short',       '\xef'),
+	('UTF8',   '3 byte, short',       '\xef82'),
+	('UTF8',   '3 byte',              '\xef8283'),
+	('UTF8',   '4 byte, short',       '\xf7'),
+	('UTF8',   '4 byte, short',       '\xf782'),
+	('UTF8',   '4 byte, short',       '\xf78283'),
+	('UTF8',   '4 byte',              '\xf7828384'),
+	('UTF8',   '5 byte, unsupported', '\xfb'),
+	('UTF8',   '5 byte, unsupported', '\xfb82'),
+	('UTF8',   '5 byte, unsupported', '\xfb8283'),
+	('UTF8',   '5 byte, unsupported', '\xfb828384'),
+	('UTF8',   '5 byte, unsupported', '\xfb82838485'),
+	('UTF8',   '6 byte, unsupported', '\xfd'),
+	('UTF8',   '6 byte, unsupported', '\xfd82'),
+	('UTF8',   '6 byte, unsupported', '\xfd8283'),
+	('UTF8',   '6 byte, unsupported', '\xfd828384'),
+	('UTF8',   '6 byte, unsupported', '\xfd82838485'),
+	('UTF8',   '6 byte, unsupported', '\xfd8283848586'),
+	-- MULE_INTERNAL
+	-- 2 81..8d LC1
+	-- 3 90..99 LC2
+	('MULE_INTERNAL', 'ASCII',         'a'),
+	('MULE_INTERNAL', 'LC1, short',    '\x81'),
+	('MULE_INTERNAL', 'LC1',           '\x8182'),
+	('MULE_INTERNAL', 'LC2, short',    '\x90'),
+	('MULE_INTERNAL', 'LC2, short',    '\x9082'),
+	('MULE_INTERNAL', 'LC2',           '\x908283');
+SELECT COUNT(test_encoding(encoding, description, input)) > 0
+FROM encoding_tests;
+NOTICE:  LATIN1 ASCII:                \x61 -> {97} -> \x61 = OK
+NOTICE:  LATIN1 extended:             \xe9 -> {233} -> \xe9 = OK
+NOTICE:  EUC_JP ASCII:                \x61 -> {97} -> \x61 = OK
+NOTICE:  EUC_JP CS1, short:           \x80 -> {} -> \x = truncated
+NOTICE:  EUC_JP CS1:                  \x8002 -> {32770} -> \x8002 = OK
+NOTICE:  EUC_JP CS2, short:           \x8e -> {} -> \x = truncated
+NOTICE:  EUC_JP CS2:                  \x8e02 -> {36354} -> \x8e02 = OK
+NOTICE:  EUC_JP CS3, short:           \x8f -> {} -> \x = truncated
+NOTICE:  EUC_JP CS3, short:           \x8f02 -> {} -> \x = truncated
+NOTICE:  EUC_JP CS3:                  \x8f0203 -> {9372163} -> \x8f0203 = OK
+NOTICE:  EUC_CN ASCII:                \x61 -> {97} -> \x61 = OK
+NOTICE:  EUC_CN CS1, short:           \x80 -> {} -> \x = truncated
+NOTICE:  EUC_CN CS1:                  \x8002 -> {32770} -> \x8002 = OK
+NOTICE:  EUC_CN CS2, short:           \x8e -> {} -> \x = truncated
+NOTICE:  EUC_CN CS2, short:           \x8e02 -> {} -> \x = truncated
+NOTICE:  EUC_CN CS2:                  \x8e0203 -> {9306627} -> \x8e0203 = OK
+NOTICE:  EUC_CN CS3, short:           \x8f -> {} -> \x = truncated
+NOTICE:  EUC_CN CS3, short:           \x8f02 -> {} -> \x = truncated
+NOTICE:  EUC_CN CS3:                  \x8f0203 -> {9372163} -> \x8f0203 = OK
+NOTICE:  EUC_TW ASCII:                \x61 -> {97} -> \x61 = OK
+NOTICE:  EUC_TW CS1, short:           \x80 -> {} -> \x = truncated
+NOTICE:  EUC_TW CS1:                  \x8002 -> {32770} -> \x8002 = OK
+NOTICE:  EUC_TW CS2, short:           \x8e -> {} -> \x = truncated
+NOTICE:  EUC_TW CS2, short:           \x8e02 -> {} -> \x = truncated
+NOTICE:  EUC_TW CS2, short:           \x8e0203 -> {} -> \x = truncated
+NOTICE:  EUC_TW CS2:                  \x8e020304 -> {-1912470780} -> \x8e020304 = OK
+NOTICE:  EUC_TW CS3, short:           \x8f -> {} -> \x = truncated
+NOTICE:  EUC_TW CS3, short:           \x8f02 -> {} -> \x = truncated
+NOTICE:  EUC_TW CS3:                  \x8f0203 -> {9372163} -> \x8f0203 = OK
+NOTICE:  UTF8 ASCII:                  \x61 -> {97} -> \x61 = OK
+NOTICE:  UTF8 2 byte, short:          \xdf -> {} -> \x = truncated
+NOTICE:  UTF8 2 byte:                 \xdf82 -> {1986} -> \xdf82 = OK
+NOTICE:  UTF8 3 byte, short:          \xef -> {} -> \x = truncated
+NOTICE:  UTF8 3 byte, short:          \xef82 -> {} -> \x = truncated
+NOTICE:  UTF8 3 byte:                 \xef8283 -> {61571} -> \xef8283 = OK
+NOTICE:  UTF8 4 byte, short:          \xf7 -> {} -> \x = truncated
+NOTICE:  UTF8 4 byte, short:          \xf782 -> {} -> \x = truncated
+NOTICE:  UTF8 4 byte, short:          \xf78283 -> {} -> \x = truncated
+NOTICE:  UTF8 4 byte:                 \xf7828384 -> {1843396} -> \xf7828384 = OK
+NOTICE:  UTF8 5 byte, unsupported:    \xfb -> {251} -> \xc3bb = failed
+NOTICE:  UTF8 5 byte, unsupported:    \xfb82 -> {251,130} -> \xc3bbc282 = failed
+NOTICE:  UTF8 5 byte, unsupported:    \xfb8283 -> {251,130,131} -> \xc3bbc282c283 = failed
+NOTICE:  UTF8 5 byte, unsupported:    \xfb828384 -> {251,130,131,132} -> \xc3bbc282c283c284 = failed
+NOTICE:  UTF8 5 byte, unsupported:    \xfb82838485 -> {251,130,131,132,133} -> \xc3bbc282c283c284c285 = failed
+NOTICE:  UTF8 6 byte, unsupported:    \xfd -> {253} -> \xc3bd = failed
+NOTICE:  UTF8 6 byte, unsupported:    \xfd82 -> {253,130} -> \xc3bdc282 = failed
+NOTICE:  UTF8 6 byte, unsupported:    \xfd8283 -> {253,130,131} -> \xc3bdc282c283 = failed
+NOTICE:  UTF8 6 byte, unsupported:    \xfd828384 -> {253,130,131,132} -> \xc3bdc282c283c284 = failed
+NOTICE:  UTF8 6 byte, unsupported:    \xfd82838485 -> {253,130,131,132,133} -> \xc3bdc282c283c284c285 = failed
+NOTICE:  UTF8 6 byte, unsupported:    \xfd8283848586 -> {253,130,131,132,133,134} -> \xc3bdc282c283c284c285c286 = failed
+NOTICE:  MULE_INTERNAL ASCII:         \x61 -> {97} -> \x61 = OK
+NOTICE:  MULE_INTERNAL LC1, short:    \x81 -> {} -> \x = truncated
+NOTICE:  MULE_INTERNAL LC1:           \x8182 -> {8454274} -> \x8182 = OK
+NOTICE:  MULE_INTERNAL LC2, short:    \x90 -> {} -> \x = truncated
+NOTICE:  MULE_INTERNAL LC2, short:    \x9082 -> {} -> \x = truncated
+NOTICE:  MULE_INTERNAL LC2:           \x908283 -> {9470595} -> \x908283 = OK
+ ?column? 
+----------
+ t
+(1 row)
+
+DROP TABLE encoding_tests;
+DROP FUNCTION test_encoding;
+DROP FUNCTION test_text_to_wchars;
+DROP FUNCTION test_mblen_func;
+DROP FUNCTION test_bytea_to_text;
+DROP FUNCTION test_text_to_bytea;
+-- substring slow path: multi-byte escape char vs. multi-byte pattern char.
+SELECT SUBSTRING('a' SIMILAR U&'\00AC' ESCAPE U&'\00A7');
+ substring 
+-----------
+ 
+(1 row)
+
+-- Levenshtein distance metric: exercise character length cache.
+SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name);
+ERROR:  column "real§_name" does not exist
+LINE 1: SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name);
+               ^
+HINT:  Perhaps you meant to reference the column "x.real_name".
+-- JSON errcontext: truncate long data.
+SELECT repeat(U&'\00A7', 30)::json;
+ERROR:  invalid input syntax for type json
+DETAIL:  Token "§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§" is invalid.
+CONTEXT:  JSON data, line 1: ...§§§§§§§§§§§§§§§§§§§§§§§§
diff --git a/src/test/regress/expected/encoding_1.out b/src/test/regress/expected/encoding_1.out
new file mode 100644
index 00000000000..a5b02090901
--- /dev/null
+++ b/src/test/regress/expected/encoding_1.out
@@ -0,0 +1,4 @@
+/* skip test if not UTF8 server encoding */
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/src/test/regress/expected/euc_kr.out b/src/test/regress/expected/euc_kr.out
new file mode 100644
index 00000000000..7a61c89a43a
--- /dev/null
+++ b/src/test/regress/expected/euc_kr.out
@@ -0,0 +1,16 @@
+-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent
+-- non-UTF8, multibyte encoding as of 2026-01.  Since UTF8 can represent all
+-- of EUC_KR, also run the test in UTF8.
+SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+-- Exercise is_multibyte_char_in_char (non-UTF8) slow path.
+SELECT POSITION(
+	convert_from('\xbcf6c7d0', 'EUC_KR') IN
+	convert_from('\xb0fac7d02c20bcf6c7d02c20b1e2bcfa2c20bbee', 'EUC_KR'));
+ position 
+----------
+        5
+(1 row)
+
diff --git a/src/test/regress/expected/euc_kr_1.out b/src/test/regress/expected/euc_kr_1.out
new file mode 100644
index 00000000000..faaac5d6355
--- /dev/null
+++ b/src/test/regress/expected/euc_kr_1.out
@@ -0,0 +1,6 @@
+-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent
+-- non-UTF8, multibyte encoding as of 2026-01.  Since UTF8 can represent all
+-- of EUC_KR, also run the test in UTF8.
+SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index bc5918a22b8..c4b5a58713c 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -32,7 +32,7 @@ test: strings md5 numerology point lseg line box path polygon circle date time t
 # geometry depends on point, lseg, line, box, path, polygon, circle
 # horology depends on date, time, timetz, timestamp, timestamptz, interval
 # ----------
-test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database
+test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database encoding euc_kr
 
 # ----------
 # Load huge amounts of data
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 0fc787c1aaf..cd4d1df4ef0 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -1285,6 +1285,145 @@ test_enc_conversion(PG_FUNCTION_ARGS)
 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
 }
 
+/* Convert bytea to text without validation for corruption tests from SQL. */
+PG_FUNCTION_INFO_V1(test_bytea_to_text);
+Datum
+test_bytea_to_text(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_TEXT_P(PG_GETARG_BYTEA_PP(0));
+}
+
+/* And the reverse. */
+PG_FUNCTION_INFO_V1(test_text_to_bytea);
+Datum
+test_text_to_bytea(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_BYTEA_P(PG_GETARG_TEXT_PP(0));
+}
+
+/* Corruption tests in C. */
+PG_FUNCTION_INFO_V1(test_mblen_func);
+Datum
+test_mblen_func(PG_FUNCTION_ARGS)
+{
+	const char *func = text_to_cstring(PG_GETARG_BYTEA_PP(0));
+	const char *encoding = text_to_cstring(PG_GETARG_BYTEA_PP(1));
+	text	   *string = PG_GETARG_BYTEA_PP(2);
+	int			offset = PG_GETARG_INT32(3);
+	const char *data = VARDATA_ANY(string);
+	size_t		size = VARSIZE_ANY_EXHDR(string);
+	int			result = 0;
+
+	if (strcmp(func, "pg_mblen_unbounded") == 0)
+		result = pg_mblen_unbounded(data + offset);
+	else if (strcmp(func, "pg_mblen_cstr") == 0)
+		result = pg_mblen_cstr(data + offset);
+	else if (strcmp(func, "pg_mblen_with_len") == 0)
+		result = pg_mblen_with_len(data + offset, size - offset);
+	else if (strcmp(func, "pg_mblen_range") == 0)
+		result = pg_mblen_range(data + offset, data + size);
+	else if (strcmp(func, "pg_encoding_mblen") == 0)
+		result = pg_encoding_mblen(pg_char_to_encoding(encoding), data + offset);
+	else
+		elog(ERROR, "unknown function");
+
+	PG_RETURN_INT32(result);
+}
+
+PG_FUNCTION_INFO_V1(test_text_to_wchars);
+Datum
+test_text_to_wchars(PG_FUNCTION_ARGS)
+{
+	const char *encoding_name = text_to_cstring(PG_GETARG_BYTEA_PP(0));
+	text	   *string = PG_GETARG_TEXT_PP(1);
+	const char *data = VARDATA_ANY(string);
+	size_t		size = VARSIZE_ANY_EXHDR(string);
+	pg_wchar   *wchars = palloc(sizeof(pg_wchar) * (size + 1));
+	Datum	   *datums;
+	int			wlen;
+	int			encoding;
+
+	encoding = pg_char_to_encoding(encoding_name);
+	if (encoding < 0)
+		elog(ERROR, "unknown encoding name: %s", encoding_name);
+
+	if (size > 0)
+	{
+		datums = palloc(sizeof(Datum) * size);
+		wlen = pg_encoding_mb2wchar_with_len(encoding,
+											 data,
+											 wchars,
+											 size);
+		Assert(wlen >= 0);
+		Assert(wlen <= size);
+		Assert(wchars[wlen] == 0);
+
+		for (int i = 0; i < wlen; ++i)
+			datums[i] = UInt32GetDatum(wchars[i]);
+	}
+	else
+	{
+		datums = NULL;
+		wlen = 0;
+	}
+
+	PG_RETURN_ARRAYTYPE_P(construct_array_builtin(datums, wlen, INT4OID));
+}
+
+PG_FUNCTION_INFO_V1(test_wchars_to_text);
+Datum
+test_wchars_to_text(PG_FUNCTION_ARGS)
+{
+	const char *encoding_name = text_to_cstring(PG_GETARG_BYTEA_PP(0));
+	ArrayType  *array = PG_GETARG_ARRAYTYPE_P(1);
+	Datum	   *datums;
+	bool	   *nulls;
+	char	   *mb;
+	text	   *result;
+	int			wlen;
+	int			bytes;
+	int			encoding;
+
+	encoding = pg_char_to_encoding(encoding_name);
+	if (encoding < 0)
+		elog(ERROR, "unknown encoding name: %s", encoding_name);
+
+	deconstruct_array_builtin(array, INT4OID, &datums, &nulls, &wlen);
+
+	if (wlen > 0)
+	{
+		pg_wchar   *wchars = palloc(sizeof(pg_wchar) * wlen);
+
+		for (int i = 0; i < wlen; ++i)
+		{
+			if (nulls[i])
+				elog(ERROR, "unexpected NULL in array");
+			wchars[i] = DatumGetInt32(datums[i]);
+		}
+
+		mb = palloc(pg_encoding_max_length(encoding) * wlen + 1);
+		bytes = pg_encoding_wchar2mb_with_len(encoding, wchars, mb, wlen);
+	}
+	else
+	{
+		mb = "";
+		bytes = 0;
+	}
+
+	result = palloc(bytes + VARHDRSZ);
+	SET_VARSIZE(result, bytes + VARHDRSZ);
+	memcpy(VARDATA(result), mb, bytes);
+
+	PG_RETURN_TEXT_P(result);
+}
+
+PG_FUNCTION_INFO_V1(test_valid_server_encoding);
+Datum
+test_valid_server_encoding(PG_FUNCTION_ARGS)
+{
+	return pg_valid_server_encoding(text_to_cstring(PG_GETARG_TEXT_PP(0)));
+}
+
 /* Provide SQL access to IsBinaryCoercible() */
 PG_FUNCTION_INFO_V1(binary_coercible);
 Datum
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index aabbd6f92de..075e6949846 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -502,6 +502,10 @@ select array[]::text[];
 select '[0:1]={1.1,2.2}'::float8[];
 -- all of the above should be accepted
 
+-- some day we might allow these cases, but for now they're errors:
+select array[]::oidvector;
+select array[]::int2vector;
+
 -- tests for array aggregates
 CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]) DISTRIBUTED RANDOMLY;
 
diff --git a/src/test/regress/sql/encoding.sql b/src/test/regress/sql/encoding.sql
new file mode 100644
index 00000000000..b9543c0cb32
--- /dev/null
+++ b/src/test/regress/sql/encoding.sql
@@ -0,0 +1,228 @@
+/* skip test if not UTF8 server encoding */
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+\getenv libdir PG_LIBDIR
+\getenv dlsuffix PG_DLSUFFIX
+
+\set regresslib :libdir '/regress' :dlsuffix
+
+CREATE FUNCTION test_bytea_to_text(bytea) RETURNS text
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_text_to_bytea(text) RETURNS bytea
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_mblen_func(text, text, text, int) RETURNS int
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_text_to_wchars(text, text) RETURNS int[]
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_wchars_to_text(text, int[]) RETURNS text
+    AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_valid_server_encoding(text) RETURNS boolean
+    AS :'regresslib' LANGUAGE C STRICT;
+
+
+CREATE TABLE regress_encoding(good text, truncated text, with_nul text, truncated_with_nul text);
+INSERT INTO regress_encoding
+VALUES ('café',
+        'caf' || test_bytea_to_text('\xc3'),
+        'café' || test_bytea_to_text('\x00') || 'dcba',
+        'caf' || test_bytea_to_text('\xc300') || 'dcba');
+
+SELECT good, truncated, with_nul FROM regress_encoding;
+
+SELECT length(good) FROM regress_encoding;
+SELECT substring(good, 3, 1) FROM regress_encoding;
+SELECT substring(good, 4, 1) FROM regress_encoding;
+SELECT regexp_replace(good, '^caf(.)$', '\1') FROM regress_encoding;
+SELECT reverse(good) FROM regress_encoding;
+
+-- invalid short mb character = error
+SELECT length(truncated) FROM regress_encoding;
+SELECT substring(truncated, 1, 1) FROM regress_encoding;
+SELECT reverse(truncated) FROM regress_encoding;
+-- invalid short mb character = silently dropped
+SELECT regexp_replace(truncated, '^caf(.)$', '\1') FROM regress_encoding;
+
+-- PostgreSQL doesn't allow strings to contain NUL.  If a corrupted string
+-- contains NUL at a character boundary position, some functions treat it as a
+-- character while others treat it as a terminator, as implementation details.
+
+-- NUL = terminator
+SELECT length(with_nul) FROM regress_encoding;
+SELECT substring(with_nul, 3, 1) FROM regress_encoding;
+SELECT substring(with_nul, 4, 1) FROM regress_encoding;
+SELECT substring(with_nul, 5, 1) FROM regress_encoding;
+SELECT convert_to(substring(with_nul, 5, 1), 'UTF8') FROM regress_encoding;
+SELECT regexp_replace(with_nul, '^caf(.)$', '\1') FROM regress_encoding;
+-- NUL = character
+SELECT with_nul, reverse(with_nul), reverse(reverse(with_nul)) FROM regress_encoding;
+
+-- If a corrupted string contains NUL in the tail bytes of a multibyte
+-- character (invalid in all encodings), it is considered part of the
+-- character for length purposes.  An error will only be raised in code paths
+-- that convert or verify encodings.
+
+SELECT length(truncated_with_nul) FROM regress_encoding;
+SELECT substring(truncated_with_nul, 3, 1) FROM regress_encoding;
+SELECT substring(truncated_with_nul, 4, 1) FROM regress_encoding;
+SELECT convert_to(substring(truncated_with_nul, 4, 1), 'UTF8') FROM regress_encoding;
+SELECT substring(truncated_with_nul, 5, 1) FROM regress_encoding;
+SELECT regexp_replace(truncated_with_nul, '^caf(.)dcba$', '\1') = test_bytea_to_text('\xc300') FROM regress_encoding;
+SELECT reverse(truncated_with_nul) FROM regress_encoding;
+
+-- unbounded: sequence would overrun the string!
+SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated, 3)
+FROM regress_encoding;
+
+-- condition detected when using the length/range variants
+SELECT test_mblen_func('pg_mblen_with_len', 'UTF8', truncated, 3)
+FROM regress_encoding;
+SELECT test_mblen_func('pg_mblen_range', 'UTF8', truncated, 3)
+FROM regress_encoding;
+
+-- unbounded: sequence would overrun the string, if the terminator were really
+-- the end of it
+SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated_with_nul, 3)
+FROM regress_encoding;
+SELECT test_mblen_func('pg_encoding_mblen', 'GB18030', truncated_with_nul, 3)
+FROM regress_encoding;
+
+-- condition detected when using the cstr variants
+SELECT test_mblen_func('pg_mblen_cstr', 'UTF8', truncated_with_nul, 3)
+FROM regress_encoding;
+
+DROP TABLE regress_encoding;
+
+-- mb<->wchar conversions
+CREATE FUNCTION test_encoding(encoding text, description text, input bytea)
+RETURNS VOID LANGUAGE plpgsql AS
+$$
+DECLARE
+	prefix text;
+	len int;
+	wchars int[];
+	round_trip bytea;
+	result text;
+BEGIN
+	prefix := rpad(encoding || ' ' || description || ':', 28);
+
+	-- XXX could also test validation, length functions and include client
+	-- only encodings with these test cases
+
+	IF test_valid_server_encoding(encoding) THEN
+		wchars := test_text_to_wchars(encoding, test_bytea_to_text(input));
+		round_trip = test_text_to_bytea(test_wchars_to_text(encoding, wchars));
+		if input = round_trip then
+			result := 'OK';
+		elsif length(input) > length(round_trip) and round_trip = substr(input, 1, length(round_trip)) then
+			result := 'truncated';
+		else
+			result := 'failed';
+		end if;
+		RAISE NOTICE '% % -> % -> % = %', prefix, input, wchars, round_trip, result;
+	END IF;
+END;
+$$;
+-- No validation is done on the encoding itself, just the length to avoid
+-- overruns, so some of the byte sequences below are bogus.  They cover
+-- all code branches, server encodings only for now.
+CREATE TABLE encoding_tests (encoding text, description text, input bytea);
+INSERT INTO encoding_tests VALUES
+	-- LATIN1, other single-byte encodings
+	('LATIN1', 'ASCII',    'a'),
+	('LATIN1', 'extended', '\xe9'),
+	-- EUC_JP, EUC_JIS_2004, EUR_KR (for the purposes of wchar conversion):
+	-- 2 8e (CS2, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
+	-- 3 8f (CS3, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
+	-- 2 80..ff (CS1)
+	('EUC_JP', 'ASCII',      'a'),
+	('EUC_JP', 'CS1, short', '\x80'),
+	('EUC_JP', 'CS1',        '\x8002'),
+	('EUC_JP', 'CS2, short', '\x8e'),
+	('EUC_JP', 'CS2',        '\x8e02'),
+	('EUC_JP', 'CS3, short', '\x8f'),
+	('EUC_JP', 'CS3, short', '\x8f02'),
+	('EUC_JP', 'CS3',        '\x8f0203'),
+	-- EUC_CN
+	-- 3 8e (CS2, not used but arbitrarily considered to have length 3)
+	-- 3 8f (CS3, not used but arbitrarily considered to have length 3)
+	-- 2 80..ff (CS1)
+	('EUC_CN', 'ASCII',      'a'),
+	('EUC_CN', 'CS1, short', '\x80'),
+	('EUC_CN', 'CS1',        '\x8002'),
+	('EUC_CN', 'CS2, short', '\x8e'),
+	('EUC_CN', 'CS2, short', '\x8e02'),
+	('EUC_CN', 'CS2',        '\x8e0203'),
+	('EUC_CN', 'CS3, short', '\x8f'),
+	('EUC_CN', 'CS3, short', '\x8f02'),
+	('EUC_CN', 'CS3',        '\x8f0203'),
+	-- EUC_TW:
+	-- 4 8e (CS2)
+	-- 3 8f (CS3, not used but arbitrarily considered to have length 3)
+	-- 2 80..ff (CS1)
+	('EUC_TW', 'ASCII',      'a'),
+	('EUC_TW', 'CS1, short', '\x80'),
+	('EUC_TW', 'CS1',        '\x8002'),
+	('EUC_TW', 'CS2, short', '\x8e'),
+	('EUC_TW', 'CS2, short', '\x8e02'),
+	('EUC_TW', 'CS2, short', '\x8e0203'),
+	('EUC_TW', 'CS2',        '\x8e020304'),
+	('EUC_TW', 'CS3, short', '\x8f'),
+	('EUC_TW', 'CS3, short', '\x8f02'),
+	('EUC_TW', 'CS3',        '\x8f0203'),
+	-- UTF8
+	-- 2 c0..df
+	-- 3 e0..ef
+	-- 4 f0..f7 (but maximum real codepoint U+10ffff has f4)
+	-- 5 f8..fb (not supported)
+	-- 6 fc..fd (not supported)
+	('UTF8',   'ASCII',               'a'),
+	('UTF8',   '2 byte, short',       '\xdf'),
+	('UTF8',   '2 byte',              '\xdf82'),
+	('UTF8',   '3 byte, short',       '\xef'),
+	('UTF8',   '3 byte, short',       '\xef82'),
+	('UTF8',   '3 byte',              '\xef8283'),
+	('UTF8',   '4 byte, short',       '\xf7'),
+	('UTF8',   '4 byte, short',       '\xf782'),
+	('UTF8',   '4 byte, short',       '\xf78283'),
+	('UTF8',   '4 byte',              '\xf7828384'),
+	('UTF8',   '5 byte, unsupported', '\xfb'),
+	('UTF8',   '5 byte, unsupported', '\xfb82'),
+	('UTF8',   '5 byte, unsupported', '\xfb8283'),
+	('UTF8',   '5 byte, unsupported', '\xfb828384'),
+	('UTF8',   '5 byte, unsupported', '\xfb82838485'),
+	('UTF8',   '6 byte, unsupported', '\xfd'),
+	('UTF8',   '6 byte, unsupported', '\xfd82'),
+	('UTF8',   '6 byte, unsupported', '\xfd8283'),
+	('UTF8',   '6 byte, unsupported', '\xfd828384'),
+	('UTF8',   '6 byte, unsupported', '\xfd82838485'),
+	('UTF8',   '6 byte, unsupported', '\xfd8283848586'),
+	-- MULE_INTERNAL
+	-- 2 81..8d LC1
+	-- 3 90..99 LC2
+	('MULE_INTERNAL', 'ASCII',         'a'),
+	('MULE_INTERNAL', 'LC1, short',    '\x81'),
+	('MULE_INTERNAL', 'LC1',           '\x8182'),
+	('MULE_INTERNAL', 'LC2, short',    '\x90'),
+	('MULE_INTERNAL', 'LC2, short',    '\x9082'),
+	('MULE_INTERNAL', 'LC2',           '\x908283');
+
+SELECT COUNT(test_encoding(encoding, description, input)) > 0
+FROM encoding_tests;
+
+DROP TABLE encoding_tests;
+DROP FUNCTION test_encoding;
+DROP FUNCTION test_text_to_wchars;
+DROP FUNCTION test_mblen_func;
+DROP FUNCTION test_bytea_to_text;
+DROP FUNCTION test_text_to_bytea;
+
+
+-- substring slow path: multi-byte escape char vs. multi-byte pattern char.
+SELECT SUBSTRING('a' SIMILAR U&'\00AC' ESCAPE U&'\00A7');
+-- Levenshtein distance metric: exercise character length cache.
+SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name);
+-- JSON errcontext: truncate long data.
+SELECT repeat(U&'\00A7', 30)::json;
diff --git a/src/test/regress/sql/euc_kr.sql b/src/test/regress/sql/euc_kr.sql
new file mode 100644
index 00000000000..1851b2a8c14
--- /dev/null
+++ b/src/test/regress/sql/euc_kr.sql
@@ -0,0 +1,12 @@
+-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent
+-- non-UTF8, multibyte encoding as of 2026-01.  Since UTF8 can represent all
+-- of EUC_KR, also run the test in UTF8.
+SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+-- Exercise is_multibyte_char_in_char (non-UTF8) slow path.
+SELECT POSITION(
+	convert_from('\xbcf6c7d0', 'EUC_KR') IN
+	convert_from('\xb0fac7d02c20bcf6c7d02c20b1e2bcfa2c20bbee', 'EUC_KR'));
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 97ae28337d3..0b1b1df3b4a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -737,6 +737,7 @@ ExtensibleNodeEntry
 ExtensibleNodeMethods
 ExtensionControlFile
 ExtensionInfo
+ExtensionSiblingCache
 ExtensionVersionInfo
 FDWCollateState
 FD_SET