diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 65d6f4073..fc68a8f5f 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -334,6 +334,19 @@ *** xref:develop:connect/cookbooks/rag.adoc[] *** xref:develop:connect/cookbooks/jira.adoc[] +* xref:sql:index.adoc[Redpanda SQL] +// ** quickstart.adoc +** xref:sql:get-started/what-is-redpanda-sql.adoc[Overview] +*** xref:sql:get-started/oltp-vs-olap.adoc[] +*** xref:sql:get-started/redpanda-sql-vs-postgresql.adoc[] +** xref:sql:connect-to-sql/index.adoc[Connect to Redpanda SQL] +*** xref:sql:connect-to-sql/language-clients/psycopg2.adoc[] +*** xref:sql:connect-to-sql/language-clients/java-jdbc.adoc[] +*** xref:sql:connect-to-sql/language-clients/php-pdo.adoc[] +*** xref:sql:connect-to-sql/language-clients/dotnet-dapper.adoc[] +** xref:sql:troubleshoot/index.adoc[Troubleshoot] +*** xref:sql:troubleshoot/degraded-state-handling.adoc[] + * xref:develop:index.adoc[Develop] ** xref:develop:kafka-clients.adoc[] ** xref:develop:topics/index.adoc[Topics] @@ -501,6 +514,225 @@ * xref:get-started:partner-integration.adoc[] * xref:reference:index.adoc[Reference] +** xref:reference:sql/index.adoc[Redpanda SQL Reference] +*** xref:reference:sql/redpanda-catalogs.adoc[] +*** xref:reference:sql/sql-statements/index.adoc[Statements] +**** xref:reference:sql/sql-statements/keywords.adoc[] +**** xref:reference:sql/sql-statements/alter-redpanda-catalog.adoc[] +**** xref:reference:sql/sql-statements/alter-storage.adoc[] +**** xref:reference:sql/sql-statements/alter-table.adoc[] +**** xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[] +**** xref:reference:sql/sql-statements/create-storage.adoc[] +**** xref:reference:sql/sql-statements/create-table.adoc[] +**** xref:reference:sql/sql-statements/drop-redpanda-catalog.adoc[] +**** xref:reference:sql/sql-statements/drop-storage.adoc[] +**** xref:reference:sql/sql-statements/drop-table.adoc[] +**** xref:reference:sql/sql-statements/select.adoc[] +**** xref:reference:sql/sql-statements/copy-to.adoc[] +**** xref:reference:sql/sql-statements/describe.adoc[] +**** xref:reference:sql/sql-statements/set-show.adoc[] +**** xref:reference:sql/sql-statements/show-tables.adoc[] +**** xref:reference:sql/sql-statements/show-execs.adoc[] +**** xref:reference:sql/sql-statements/show-nodes.adoc[] +*** xref:reference:sql/sql-clauses/index.adoc[Clauses] +**** xref:reference:sql/sql-clauses/from/index.adoc[FROM] +***** xref:reference:sql/sql-clauses/from/from.adoc[] +***** xref:reference:sql/sql-clauses/from/join.adoc[] +***** xref:reference:sql/sql-clauses/from/left-join.adoc[] +***** xref:reference:sql/sql-clauses/from/outer-join.adoc[] +***** xref:reference:sql/sql-clauses/from/right-join.adoc[] +**** xref:reference:sql/sql-clauses/where.adoc[] +**** xref:reference:sql/sql-clauses/group-by.adoc[] +**** xref:reference:sql/sql-clauses/having.adoc[] +**** xref:reference:sql/sql-clauses/order-by.adoc[] +**** xref:reference:sql/sql-clauses/limit.adoc[] +**** xref:reference:sql/sql-clauses/offset.adoc[] +**** xref:reference:sql/sql-clauses/set-operations/index.adoc[Set Operations] +***** xref:reference:sql/sql-clauses/set-operations/except.adoc[] +***** xref:reference:sql/sql-clauses/set-operations/intersect.adoc[] +***** xref:reference:sql/sql-clauses/set-operations/union.adoc[] +**** xref:reference:sql/sql-clauses/with.adoc[] +**** xref:reference:sql/sql-clauses/over-window.adoc[] +*** xref:reference:sql/sql-data-types/index.adoc[Data Types] +**** xref:reference:sql/sql-data-types/numeric-type/index.adoc[Numeric] +***** xref:reference:sql/sql-data-types/numeric-type/numeric.adoc[] +***** xref:reference:sql/sql-data-types/numeric-type/numeric-data-type-aliases.adoc[] +**** xref:reference:sql/sql-data-types/timestamp-without-time-zone.adoc[] +**** xref:reference:sql/sql-data-types/timestamp-with-time-zone.adoc[] +**** xref:reference:sql/sql-data-types/date.adoc[] +**** xref:reference:sql/sql-data-types/time-type/index.adoc[Time] +***** xref:reference:sql/sql-data-types/time-type/time.adoc[] +***** xref:reference:sql/sql-data-types/time-type/time-operators.adoc[] +**** xref:reference:sql/sql-data-types/interval.adoc[] +**** xref:reference:sql/sql-data-types/bool.adoc[] +**** xref:reference:sql/sql-data-types/text.adoc[] +**** xref:reference:sql/sql-data-types/json.adoc[] +**** xref:reference:sql/sql-data-types/array.adoc[] +**** xref:reference:sql/sql-data-types/row.adoc[] +**** xref:reference:sql/sql-data-types/geometry.adoc[] +**** xref:reference:sql/sql-data-types/geography.adoc[] +*** xref:reference:sql/sql-functions/index.adoc[Functions] +**** xref:reference:sql/sql-functions/boolean-functions/index.adoc[Boolean] +***** xref:reference:sql/sql-functions/boolean-functions/if-function.adoc[] +***** xref:reference:sql/sql-functions/boolean-functions/is-distinct-from-operator.adoc[] +***** xref:reference:sql/sql-functions/boolean-functions/is-not-distinct-from-operator.adoc[] +**** xref:reference:sql/sql-functions/math-functions/index.adoc[Math] +***** xref:reference:sql/sql-functions/math-functions/abs.adoc[] +***** xref:reference:sql/sql-functions/math-functions/cbrt.adoc[] +***** xref:reference:sql/sql-functions/math-functions/ceil.adoc[] +***** xref:reference:sql/sql-functions/math-functions/exp.adoc[] +***** xref:reference:sql/sql-functions/math-functions/floor.adoc[] +***** xref:reference:sql/sql-functions/math-functions/greatest.adoc[] +***** xref:reference:sql/sql-functions/math-functions/least.adoc[] +***** xref:reference:sql/sql-functions/math-functions/ln.adoc[] +***** xref:reference:sql/sql-functions/math-functions/log.adoc[] +***** xref:reference:sql/sql-functions/math-functions/power.adoc[] +***** xref:reference:sql/sql-functions/math-functions/random.adoc[] +***** xref:reference:sql/sql-functions/math-functions/round.adoc[] +***** xref:reference:sql/sql-functions/math-functions/sign.adoc[] +***** xref:reference:sql/sql-functions/math-functions/sin.adoc[] +***** xref:reference:sql/sql-functions/math-functions/sinh.adoc[] +***** xref:reference:sql/sql-functions/math-functions/cosh.adoc[] +***** xref:reference:sql/sql-functions/math-functions/sqrt.adoc[] +***** xref:reference:sql/sql-functions/math-functions/to-char-from-number.adoc[] +**** xref:reference:sql/sql-functions/string-functions/index.adoc[String] +***** xref:reference:sql/sql-functions/string-functions/concat.adoc[] +***** xref:reference:sql/sql-functions/string-functions/ends-with.adoc[] +***** xref:reference:sql/sql-functions/string-functions/length.adoc[] +***** xref:reference:sql/sql-functions/string-functions/lower.adoc[] +***** xref:reference:sql/sql-functions/string-functions/position.adoc[] +***** xref:reference:sql/sql-functions/string-functions/replace.adoc[] +***** xref:reference:sql/sql-functions/string-functions/starts-with.adoc[] +***** xref:reference:sql/sql-functions/string-functions/strpos.adoc[] +***** xref:reference:sql/sql-functions/string-functions/substr.adoc[] +***** xref:reference:sql/sql-functions/string-functions/substring.adoc[] +***** xref:reference:sql/sql-functions/string-functions/upper.adoc[] +***** xref:reference:sql/sql-functions/string-functions/regex/index.adoc[Regex] +****** xref:reference:sql/sql-functions/string-functions/regex/regexp-replace.adoc[] +****** xref:reference:sql/sql-functions/string-functions/regex/regexp-match.adoc[] +****** xref:reference:sql/sql-functions/string-functions/regex/posix-regular-expressions.adoc[] +**** xref:reference:sql/sql-functions/timestamp-functions/index.adoc[Timestamp] +***** xref:reference:sql/sql-functions/timestamp-functions/current-timestamp.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/date-trunc.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/extract.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/format-timestamp.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/timestamp-micros.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/timestamp-millis.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/timestamp-seconds.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/timestamp-trunc.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/to-char.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/to-timestamp.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/unix-micros.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/unix-millis.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/unix-seconds.adoc[] +**** xref:reference:sql/sql-functions/json-functions/index.adoc[JSON] +***** xref:reference:sql/sql-functions/json-functions/json-array-extract.adoc[] +***** xref:reference:sql/sql-functions/json-functions/json-array-length.adoc[] +***** xref:reference:sql/sql-functions/json-functions/json-extract-path.adoc[] +***** xref:reference:sql/sql-functions/json-functions/json-extract-path-text.adoc[] +**** xref:reference:sql/sql-functions/aggregate-functions/index.adoc[Aggregate] +***** xref:reference:sql/sql-functions/aggregate-functions/avg.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/bool-and.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/bool-or.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/count.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/distinct.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/for-max.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/for-min.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/max.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/min.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/sum.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/index.adoc[Ordered-Set] +****** xref:reference:sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/mode.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-cont.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-disc.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/statistics/index.adoc[Statistics] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/corr.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/covar-pop.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/covar-samp.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-avgx.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-avgy.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-count.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-intercept.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-r2.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-slope.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-sxx.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-sxy.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-syy.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev-pop.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev-samp.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/variance.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/var-pop.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/var-samp.adoc[] +**** xref:reference:sql/sql-functions/window-functions/index.adoc[Window] +***** xref:reference:sql/sql-functions/window-functions/avg.adoc[] +***** xref:reference:sql/sql-functions/window-functions/bool-and.adoc[] +***** xref:reference:sql/sql-functions/window-functions/bool-or.adoc[] +***** xref:reference:sql/sql-functions/window-functions/count.adoc[] +***** xref:reference:sql/sql-functions/window-functions/cume-dist.adoc[] +***** xref:reference:sql/sql-functions/window-functions/dense-rank.adoc[] +***** xref:reference:sql/sql-functions/window-functions/first-value.adoc[] +***** xref:reference:sql/sql-functions/window-functions/lag.adoc[] +***** xref:reference:sql/sql-functions/window-functions/last-value.adoc[] +***** xref:reference:sql/sql-functions/window-functions/lead.adoc[] +***** xref:reference:sql/sql-functions/window-functions/max.adoc[] +***** xref:reference:sql/sql-functions/window-functions/min.adoc[] +***** xref:reference:sql/sql-functions/window-functions/nth-value.adoc[] +***** xref:reference:sql/sql-functions/window-functions/ntile.adoc[] +***** xref:reference:sql/sql-functions/window-functions/percent-rank.adoc[] +***** xref:reference:sql/sql-functions/window-functions/rank.adoc[] +***** xref:reference:sql/sql-functions/window-functions/row-number.adoc[] +***** xref:reference:sql/sql-functions/window-functions/sum.adoc[] +**** xref:reference:sql/sql-functions/other-functions/index.adoc[Other] +***** xref:reference:sql/sql-functions/other-functions/coalesce.adoc[] +***** xref:reference:sql/sql-functions/other-functions/col-description.adoc[] +***** xref:reference:sql/sql-functions/other-functions/current-database.adoc[] +***** xref:reference:sql/sql-functions/other-functions/current-schema.adoc[] +***** xref:reference:sql/sql-functions/other-functions/has-schema-privilege.adoc[] +***** xref:reference:sql/sql-functions/other-functions/nullif.adoc[] +***** xref:reference:sql/sql-functions/other-functions/obj-description.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-backend-pid.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-encoding-to-char.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-get-constraintdef.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-get-expr.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-get-indexdef.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-get-statisticsobjdef-columns.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-get-userbyid.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-relation-is-publishable.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-size-pretty.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-table-is-visible.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-table-size.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-total-relation-size.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-typeof.adoc[] +***** xref:reference:sql/sql-functions/other-functions/shobj-description.adoc[] +***** xref:reference:sql/sql-functions/other-functions/generate-series.adoc[] +*** xref:reference:sql/sql-operators/index.adoc[Operators] +**** xref:reference:sql/sql-operators/bitwise-shift-left.adoc[] +**** xref:reference:sql/sql-operators/bitwise-shift-right.adoc[] +*** xref:reference:sql/schema.adoc[] +*** xref:reference:sql/comment-support.adoc[] +*** xref:reference:sql/transactions.adoc[] +*** xref:reference:sql/system-virtual-tables.adoc[] +*** xref:reference:sql/system-catalogs/index.adoc[System Catalogs] +**** xref:reference:sql/system-catalogs/catalogs/pg_attrdef.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_attribute.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_authid.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_class.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_constraint.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_database.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_depend.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_description.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_index.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_language.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_namespace.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_proc.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_roles.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_settings.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_shadow.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_statio_user_tables.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_type.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_user.adoc[] + ** xref:reference:tiers/index.adoc[Cloud Tiers and Regions] *** xref:reference:tiers/serverless-regions.adoc[] *** xref:reference:tiers/byoc-tiers.adoc[] diff --git a/modules/reference/images/sql/join-venn.png b/modules/reference/images/sql/join-venn.png new file mode 100644 index 000000000..58d544500 Binary files /dev/null and b/modules/reference/images/sql/join-venn.png differ diff --git a/modules/reference/images/sql/left-join-venn.png b/modules/reference/images/sql/left-join-venn.png new file mode 100644 index 000000000..6fa338d49 Binary files /dev/null and b/modules/reference/images/sql/left-join-venn.png differ diff --git a/modules/reference/images/sql/outer-join-venn.png b/modules/reference/images/sql/outer-join-venn.png new file mode 100644 index 000000000..4f6cee4de Binary files /dev/null and b/modules/reference/images/sql/outer-join-venn.png differ diff --git a/modules/reference/images/sql/outer-join-where-venn.png b/modules/reference/images/sql/outer-join-where-venn.png new file mode 100644 index 000000000..18dfdd1e6 Binary files /dev/null and b/modules/reference/images/sql/outer-join-where-venn.png differ diff --git a/modules/reference/images/sql/right-join-venn.png b/modules/reference/images/sql/right-join-venn.png new file mode 100644 index 000000000..6035ab654 Binary files /dev/null and b/modules/reference/images/sql/right-join-venn.png differ diff --git a/modules/reference/pages/sql/comment-support.adoc b/modules/reference/pages/sql/comment-support.adoc new file mode 100644 index 000000000..e27cf1c47 --- /dev/null +++ b/modules/reference/pages/sql/comment-support.adoc @@ -0,0 +1,61 @@ += Comment Support +:description: Redpanda SQL fully supports comments in your queries. +:page-topic-type: reference + +Redpanda SQL fully supports comments in your queries. Comments provide a way to add explanatory notes and improve the readability of queries, making it easier for developers and stakeholders to understand complex queries. + +There are two types of comments in Redpanda SQL: single-line and multi-line (block). + +== Single line comments + +A single-line comment in Redpanda SQL starts with two consecutive hyphens (--) and extends to the end of the line. Use these comments to annotate specific parts of a query with brief explanations or notes that help readers understand the query. + +=== Syntax + +[source,sql] +---- +-- This is an example single line comment +---- + +== Multi-line (block) comments + +Redpanda SQL also supports multi-line comments, often referred to as block comments. These comments begin with `/*` and end with `*/`, allowing for multi-line explanations or temporarily disabling sections of the query. + +=== Syntax + +[source,sql] +---- +/* +This is an example multi-line comment. +It can span multiple lines and is useful for providing detailed explanations. +*/ +---- + +== Comment placement + +In Redpanda SQL, single-line comments should always be placed at the end of the line they refer to, whereas multi-line comments can be positioned anywhere within the query. + +=== Comment on a single line + +[source,sql] +---- +SELECT column1, column2 -- This is an example single line comment +FROM table_name; +---- + +=== Comment on multiple lines + +[source,sql] +---- +SELECT /* comment 1 */ column1, column2 +FROM table_name /* comment 2 */ +WHERE column3 = 42 /* comment 3 */ ; +---- + +== Best practices for commenting + +To maximize the benefits of comments in Redpanda SQL queries, follow these best practices: + +* Be concise. Write clear and concise comments that provide meaningful insights into the specific parts of the query. +* Update comments during code changes. Whenever the query is modified, update the associated comments to reflect the changes accurately. +* Avoid over-commenting. While comments are helpful, excessive commenting can clutter the code and reduce readability. diff --git a/modules/reference/pages/sql/index.adoc b/modules/reference/pages/sql/index.adoc new file mode 100644 index 000000000..2e4a5c38d --- /dev/null +++ b/modules/reference/pages/sql/index.adoc @@ -0,0 +1,12 @@ += SQL Reference +:description: This section provides information about the syntax and semantics of SQL queries, clauses, data types, and functions that Redpanda SQL supports. + +This section provides information about the syntax and semantics of SQL queries, clauses, data types, and functions that Redpanda SQL supports. The information in this section is divided into groups according to the kind of operation they perform as follows: + +* xref:reference:sql/sql-statements/index.adoc[SQL Statements]. Learn how to create a request for data or information from one or more database tables using supported statements. +* xref:reference:sql/sql-clauses/index.adoc[SQL Clauses]. Learn how to write user-friendly queries and analyze data using different constraints and conditions. +* xref:reference:sql/sql-data-types/index.adoc[SQL Data Types]. Learn how to implement supported data types to run your operations, such as text, timestamp, numeric, and many more. +* xref:reference:sql/sql-functions/index.adoc[SQL Functions]. See how you can combine statements, data types, and other references into specific functions for particular tasks. +* xref:reference:sql/schema.adoc[Schema]. Learn about a logical container that holds database objects and relationships of data in a database. +* xref:reference:sql/comment-support.adoc[Comment Support]. Add comments in your queries for better documentation and collaboration. +* xref:reference:sql/transactions.adoc[Transactions]. Learn more about managing your transactions. diff --git a/modules/reference/pages/sql/redpanda-catalogs.adoc b/modules/reference/pages/sql/redpanda-catalogs.adoc new file mode 100644 index 000000000..d7e3048ab --- /dev/null +++ b/modules/reference/pages/sql/redpanda-catalogs.adoc @@ -0,0 +1,81 @@ += Redpanda Catalogs +:description: Redpanda catalogs are named connections that map Redpanda topics to queryable SQL tables. +:page-topic-type: reference + +Redpanda catalogs are named connections that let you query Redpanda topics using standard SQL. The catalog model consists of three core concepts: + +* Catalogs: Named connections to a Redpanda cluster, created with xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[CREATE REDPANDA CATALOG]. +* Tables: Redpanda topics mapped as queryable SQL tables using the `catalog_name\=>table_name` syntax, created with xref:reference:sql/sql-statements/create-table.adoc[CREATE TABLE]. +* Storage connections: Named connections to external object storage such as Amazon S3, created with xref:reference:sql/sql-statements/create-storage.adoc[CREATE STORAGE]. + +NOTE: Redpanda SQL operates in read-only mode. Data mutation operations such as `INSERT`, `UPDATE`, and `DELETE` are not available. Data is ingested into Redpanda topics and made queryable through catalog mappings. + +== Typical workflow + +To query Redpanda topic data with SQL: + +. Create a catalog connection: ++ +[source,sql] +---- +CREATE REDPANDA CATALOG my_catalog +WITH ( + initial_brokers = 'broker1:9092', + schema_registry_url = 'http://schema-registry:8081' +); +---- + +. Map a topic as a table: ++ +[source,sql] +---- +CREATE TABLE my_catalog=>user_events +WITH (topic = 'user-events'); +---- + +. Query the data: ++ +[source,sql] +---- +SELECT * FROM my_catalog=>user_events LIMIT 10; +---- + +== Related statements + +[cols="<40%,<60%",options="header"] +|=== +|Statement |Description + +|xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[CREATE REDPANDA CATALOG] +|Create a catalog connection to a Redpanda cluster. + +|xref:reference:sql/sql-statements/alter-redpanda-catalog.adoc[ALTER REDPANDA CATALOG] +|Modify connection properties of an existing catalog. + +|xref:reference:sql/sql-statements/create-table.adoc[CREATE TABLE] +|Map a Redpanda topic to a SQL table through a catalog. + +|xref:reference:sql/sql-statements/alter-table.adoc[ALTER TABLE] +|Modify options of an existing catalog table. + +|xref:reference:sql/sql-statements/drop-table.adoc[DROP TABLE] +|Remove a catalog table mapping. + +|xref:reference:sql/sql-statements/drop-redpanda-catalog.adoc[DROP REDPANDA CATALOG] +|Remove a Redpanda catalog connection. + +|xref:reference:sql/sql-statements/drop-storage.adoc[DROP STORAGE] +|Remove a named storage definition. + +|xref:reference:sql/sql-statements/show-tables.adoc[SHOW TABLES] +|List tables within a catalog. + +|xref:reference:sql/sql-statements/describe.adoc[DESCRIBE] +|Show details about a catalog or catalog table. + +|xref:reference:sql/sql-statements/create-storage.adoc[CREATE STORAGE] +|Create a connection to external object storage. + +|xref:reference:sql/sql-statements/alter-storage.adoc[ALTER STORAGE] +|Modify an existing storage connection. +|=== diff --git a/modules/reference/pages/sql/schema.adoc b/modules/reference/pages/sql/schema.adoc new file mode 100644 index 000000000..f30f4e2bb --- /dev/null +++ b/modules/reference/pages/sql/schema.adoc @@ -0,0 +1,228 @@ += Schemas +:description: Schemas are namespaces that organize tables and other database objects in Redpanda SQL. +:page-topic-type: reference + +A schema is a namespace that groups related database objects, including tables, views, indexes, sequences, data types, operators, and functions. Schemas let you organize objects into logical groups so their names do not collide. + +Redpanda SQL supports multiple schemas in a single database. For example, in a database named `mydb`, you might define schemas such as `auth`, `model`, and `business`. + +== Default schema + +By default, Redpanda SQL uses the `public` schema. When you reference an unqualified table name, it is equivalent to `public.table_name`. The same rule applies to `CREATE`, `DROP`, and `SELECT` statements. + +== Syntax + +=== Create a schema + +[source,sql] +---- +CREATE SCHEMA [IF NOT EXISTS] schema_name; +---- + +* `schema_name`: Name of the schema to create. +* `IF NOT EXISTS`: Optional. Prevents an error if the schema already exists. + +=== Create a table in a schema + +[source,sql] +---- +CREATE TABLE schema_name.table_name( +... +); +---- + +* `schema_name`: Name of an existing schema. +* `table_name`: Name of the table to create. + +=== Query a table in a schema + +[source,sql] +---- +SELECT * FROM schema_name.table_name; +---- + +* `schema_name`: Name of the schema. +* `table_name`: Name of the table to query. + +=== Drop a schema + +To drop an empty schema: + +[source,sql] +---- +DROP SCHEMA [IF EXISTS] schema_name; +---- + +* `schema_name`: Name of the schema to drop. +* `IF EXISTS`: Optional. Prevents an error if the schema does not exist. + +To drop a schema that contains tables, use `CASCADE` to drop the schema and its contents: + +[source,sql] +---- +DROP SCHEMA schema_name CASCADE; +---- + +== Examples + +=== Create a schema and table + +. Create a schema: ++ +[source,sql] +---- +CREATE SCHEMA app_data; +---- + +. Create a table in the `app_data` schema and insert rows: ++ +[source,sql] +---- +CREATE TABLE app_data.functions( + id int, + function_name text, + active bool +); + +INSERT INTO app_data.functions(id, function_name, active) +VALUES +('1111', 'Numeric', 'TRUE'), +('2222', 'Text', 'TRUE'), +('3333', 'Timestamp', 'TRUE'), +('4444', 'JSON', 'TRUE'), +('5555', 'Boolean', 'TRUE'); +---- + +. Verify the table contents: ++ +[source,sql] +---- +SELECT * FROM app_data.functions; +---- + +. The query returns: ++ +[source,sql] +---- ++------+---------------+---------+ +| id | function_name | active | ++------+---------------+---------+ +| 1111 | Numeric | t | +| 2222 | Text | t | +| 3333 | Timestamp | t | +| 4444 | JSON | t | +| 5555 | Boolean | t | ++------+---------------+---------+ +---- + +=== Use IF NOT EXISTS + +The `IF NOT EXISTS` option lets the `CREATE SCHEMA` query succeed without error when the schema already exists. The schema is not modified. + +==== Without IF NOT EXISTS + +. Create a schema: ++ +[source,sql] +---- +CREATE SCHEMA analytics; +---- ++ +The query returns: ++ +[source,sql] +---- +CREATE SCHEMA +---- + +. Run the same `CREATE SCHEMA` statement again. The query fails with an error: ++ +[source,sql] +---- +CREATE SCHEMA analytics; +---- ++ +The query returns: ++ +[source,sql] +---- +ERROR: Schema: analytics already exists +---- + +==== With IF NOT EXISTS + +Run the same `CREATE SCHEMA` statement with `IF NOT EXISTS`. The query succeeds without error: + +[source,sql] +---- +CREATE SCHEMA IF NOT EXISTS analytics; +---- + +[source,sql] +---- +CREATE +---- + +=== Drop a schema + +To drop a schema and all its tables, use `CASCADE`: + +[source,sql] +---- +DROP SCHEMA app_data CASCADE; +---- + +If the schema is empty, you can drop it without `CASCADE`: + +[source,sql] +---- +DROP SCHEMA app_data; +---- + +=== Use IF EXISTS + +The `IF EXISTS` option lets the `DROP SCHEMA` query succeed without error when the schema does not exist. + +==== Without IF EXISTS + +. Drop a schema: ++ +[source,sql] +---- +DROP SCHEMA analytics; +---- ++ +The query returns: ++ +[source,sql] +---- +DROP +---- + +. Run the same `DROP SCHEMA` statement again. The query fails with an error: ++ +[source,sql] +---- +DROP SCHEMA analytics; +---- ++ +The query returns: ++ +[source,sql] +---- +ERROR: schema "analytics" does not exist +---- + +==== With IF EXISTS + +Run the same `DROP SCHEMA` statement with `IF EXISTS`. The query succeeds without error: + +[source,sql] +---- +DROP SCHEMA IF EXISTS analytics; +---- + +[source,sql] +---- +DROP +---- diff --git a/modules/reference/pages/sql/sql-clauses/from/from.adoc b/modules/reference/pages/sql/sql-clauses/from/from.adoc new file mode 100644 index 000000000..481e1ee3f --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/from.adoc @@ -0,0 +1,240 @@ += FROM +:description: The FROM clause specifies the tables that a query reads data from. +:page-topic-type: reference + +The `FROM` clause specifies the tables that a query reads data from. + +== Syntax + +A `FROM` clause must list at least one table: + +[source,sql] +---- +query FROM table_name; +---- + +If two or more tables are listed, they are joined with one of: xref:reference:sql/sql-clauses/from/join.adoc[JOIN], xref:reference:sql/sql-clauses/from/right-join.adoc[RIGHT JOIN], xref:reference:sql/sql-clauses/from/left-join.adoc[LEFT JOIN], or xref:reference:sql/sql-clauses/from/outer-join.adoc[OUTER JOIN]. + +[source,sql] +---- +FROM table1_name +[ { JOIN + | LEFT JOIN + | RIGHT JOIN + | OUTER JOIN } table2_name +ON table1_name.column1 = table2_name.column1 ] +---- + +[NOTE] +==== +These examples use the `public` schema, the default in Redpanda SQL. For information on creating or displaying tables in other schemas, see xref:reference:sql/schema.adoc[Schema]. +==== + +== Examples + +This example uses the `FROM` clause with a single table. Given a `client` table, the goal is to retrieve the client's name and the city where the company is based. + +[source,sql] +---- +CREATE TABLE client ( + client_id int, + client_name text, + client_origin text +); +INSERT INTO client + (client_id, client_name, client_origin) +VALUES + (181891,'Toyota','Japan'), + (181892,'Google','USA'), + (181893,'Samsung','South Korea'); +---- + +[source,sql] +---- +SELECT * FROM client; +---- + +This returns: + +[source,sql] +---- ++------------+--------------+------------------+ +| client_id | client_name | client_origin | ++------------+--------------+------------------+ +| 181891 | Toyota | Japan | +| 181892 | Google | USA | +| 181893 | Samsung | South Korea | ++------------+--------------+------------------+ +---- + +. Run the following query: ++ +[source,sql] +---- +SELECT client_name, client_origin FROM client; +---- + +. The query returns: ++ +[source,sql] +---- ++--------------+------------------+ +| client_name | client_origin | ++--------------+------------------+ +| Toyota | Japan | +| Google | USA | +| Samsung | South Korea | ++--------------+------------------+ +---- + +[TIP] +==== +For multi-table queries, see: xref:reference:sql/sql-clauses/from/join.adoc[JOIN], xref:reference:sql/sql-clauses/from/right-join.adoc[RIGHT JOIN], xref:reference:sql/sql-clauses/from/left-join.adoc[LEFT JOIN], or xref:reference:sql/sql-clauses/from/outer-join.adoc[OUTER JOIN]. +==== + +== Subqueries with FROM + +The `FROM` clause can also specify a subquery. The result of the subquery becomes a new relation that the outer query can reference. + +[NOTE] +==== +You can list more than one table by separating them with a comma (`,`). This is an implicit (cross) join: `FROM t1, t2 WHERE t1.id = t2.id` is equivalent to `FROM t1 JOIN t2 ON t1.id = t2.id`. Without a `WHERE` clause, the result is the Cartesian product of all rows in both tables. +==== + +=== Syntax + +[source,sql] +---- +SELECT X.column1, X.column2, X.column3 +FROM table_2 as X, table_1 as Y +WHERE conditions (X.column, Y.column); +---- + +. The subquery in the first `FROM` clause selects the columns from the specific table using a new temporary relation (`SELECT X.column1, X.column2, X.column3 FROM`). +. Set the tables into a new temporary relation (`table_2 as X, table_1 as Y`). +. The query is evaluated, selecting only those rows from the temporary relation that fulfill the conditions stated in the `WHERE` clause. + +=== Example + +The following example finds a product whose price exceeds the average budget across all categories. + +[source,sql] +---- +CREATE TABLE product ( + id int, + product text, + category text, + price int +); +INSERT INTO product + (id, product, category, price) +VALUES + (445747,'Court vision women''s shoes nike','Shoes', 8000), + (445641,'Disney kids h&m','Shirt', 6500), + (477278,'Defacto adidas','Hat', 8500), + (481427,'Sophie shopping bag','Bag', 6500), + (411547,'Candy skirt zara','Skirt', 6500), + (488198,'Slim cut skirt hush puppies','Skirt', 7600); +---- + +[source,sql] +---- +SELECT * FROM product; +---- + +This returns: + +[source,sql] +---- ++---------+----------------------------------+-----------+--------+ +| id | product | category | price | ++---------+----------------------------------+-----------+--------+ +| 445747 | Court vision women's shoes nike | Shoes | 8000 | +| 445641 | Disney kids h&m | Shirt | 6500 | +| 477278 | Defacto adidas | Hat | 8500 | +| 481427 | Sophie shopping bag | Bag | 6500 | +| 411547 | Candy skirt zara | Skirt | 6500 | +| 488198 | Slim cut skirt hush puppies | Skirt | 7600 | ++---------+----------------------------------+-----------+--------+ +---- + +Create a `category` table: + +[source,sql] +---- +CREATE TABLE category ( + categoryName text, + budget int +); +INSERT INTO category + (categoryName, budget) +VALUES + ('Shoes', 7000), + ('Shirt', 9000), + ('Bag', 8000), + ('Skirt', 7500), + ('Hat', 7000); +---- + +[source,sql] +---- +SELECT * FROM category; +---- + +This returns: + +[source,sql] +---- ++---------------+----------+ +| categoryName | budget | ++---------------+----------+ +| Shoes | 7000 | +| Shirt | 9000 | +| Bag | 8000 | +| Skirt | 7500 | +| Hat | 7000 | ++---------------+----------+ +---- + +. Run the following query to find the average budget across all categories: ++ +[source,sql] +---- +select avg(budget) as avgBudget from category; +---- + +. The query returns the average budget across all categories: ++ +[source,sql] +---- ++--------------------+ +| avgbudget | ++--------------------+ +| 7700.000000000000 | ++--------------------+ +---- + +. Run: ++ +* The `product` table is aliased as `P` and the budget's average value from the `category` table as `C`. +* The query displays the product's name, category, and price. +* The conditions are set where the product's price exceeds the budget's average value. ++ +[source,sql] +---- +select P.product, P.category, P.price from +(select avg(budget) as avgBudget from category) as C, product as P +where P.price > C.avgBudget; +---- + +. The query returns the products with a price greater than 7700: ++ +[source,sql] +---- ++------------------------------------+-----------+----------+ +| product | category | price | ++------------------------------------+-----------+----------+ +| Court vision women's shoes nike | Shoes | 8000 | +| Defacto adidas | Hat | 8500 | ++------------------------------------+-----------+----------+ +---- diff --git a/modules/reference/pages/sql/sql-clauses/from/index.adoc b/modules/reference/pages/sql/sql-clauses/from/index.adoc new file mode 100644 index 000000000..7cca9b355 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/index.adoc @@ -0,0 +1,3 @@ += FROM +:description: Reference for the FROM clause and JOIN variants in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-clauses/from/join.adoc b/modules/reference/pages/sql/sql-clauses/from/join.adoc new file mode 100644 index 000000000..410a537f6 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/join.adoc @@ -0,0 +1,155 @@ += JOIN +:description: The JOIN clause combines records from two tables based on common fields. +:page-topic-type: reference + +The `JOIN` clause combines records from two tables based on common fields. + +== Syntax + +=== JOIN ... ON + +[source,sql] +---- +SELECT table_1.column_1, table_2.column_2... +FROM table_1 +JOIN table_2 +ON table_1.common_field = table_2.common_field +---- + +. `SELECT table_1.column_1, table_2.column_2...` selects the columns to display from both tables. +. `FROM table_1 JOIN table_2` represents the joined tables. +. `ON table_1.common_field = table_2.common_field` compares each row of `table_1` with each row of `table_2` to find all pairs of rows that meet the join condition. +. When the condition is met, Redpanda SQL combines column values for each matched pair of rows from `table_1` and `table_2` into a result row. + +=== JOIN ... USING + +[source,sql] +---- +SELECT column_1, column_2... +FROM table_1 +JOIN table_2 +USING (column_name [, column_name2 ...]) +---- + +The `USING` clause is a shorthand for joining tables when both tables share columns with the same name. Instead of writing `ON table_1.id = table_2.id`, you can write `USING (id)`. When joining on multiple shared columns, separate them with commas: `USING (id, name)`. + +=== Table alias + +A table alias is a temporary name given to a table, column, or expression in a query. Aliases don't change the result; they make queries easier to read. + +[source,sql] +---- +SELECT left.column_1, right.column_2... +FROM table_1 as left +JOIN table_2 as right +ON left.common_field = right.common_field +---- + +== Examples + +The following examples use two tables: `movies` and `categories`. + +Create the `movies` table: + +[source,sql] +---- +CREATE TABLE movies ( + movie_id int, + movie_name text, + category_id int +); +INSERT INTO movies + (movie_id, movie_name, category_id) +VALUES + (201011, 'The Avengers', 181893), + (200914, 'Avatar', 181894), + (201029, 'Shutter Island', 181891), + (201925, 'Tune in Your Love', 181892); +---- + +[source,sql] +---- +SELECT * FROM movies; +---- + +This returns: + +[source,sql] +---- ++------------+-----------------------+--------------+ +| movie_id | movie_name | category_id | ++------------+-----------------------+--------------+ +| 201011 | The Avengers | 181893 | +| 200914 | Avatar | 181894 | +| 201029 | Shutter Island | 181891 | +| 201925 | Tune in Your Love | 181892 | ++------------+-----------------------+--------------+ +---- + +Create the `categories` table: + +[source,sql] +---- +CREATE TABLE categories ( + id int, + category_name text +); +INSERT INTO categories + (id, category_name) +VALUES + (181891, 'Psychological Thriller'), + (181892, 'Romance'), + (181893, 'Fantasy'), + (181894, 'Science Fiction'), + (181895, 'Action'); +---- + +[source,sql] +---- +SELECT * FROM categories; +---- + +This returns: + +[source,sql] +---- ++-----------+--------------------------+ +| id | category_name | ++-----------+--------------------------+ +| 181891 | Psychological Thriller | +| 181892 | Romance | +| 181893 | Fantasy | +| 181894 | Science Fiction | +| 181895 | Action | ++-----------+--------------------------+ +---- + +A `JOIN` query against these tables: + +[source,sql] +---- +SELECT a.movie_name, c.category_name +FROM movies AS a +JOIN categories AS c +ON a.category_id = c.id; +---- + +The query returns: + +[source,sql] +---- ++-----------------------+---------------------------+ +| movie_name | category_name | ++-----------------------+---------------------------+ +| Shutter Island | Psychological Thriller | +| Tune in Your Love | Romance | +| The Avengers | Fantasy | +| Avatar | Science Fiction | ++-----------------------+---------------------------+ +---- + +The `JOIN` checks each row of the `category_id` column in the `movies` table against the `id` column of each row in the `categories` table. When the values match, Redpanda SQL creates a new result row that combines columns from both tables. + +The following Venn diagram illustrates this example: + +image::sql/join-venn.png[Venn diagram showing an inner join between the Movies and Categories tables] diff --git a/modules/reference/pages/sql/sql-clauses/from/left-join.adoc b/modules/reference/pages/sql/sql-clauses/from/left-join.adoc new file mode 100644 index 000000000..659a4741f --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/left-join.adoc @@ -0,0 +1,171 @@ += LEFT JOIN +:description: The LEFT JOIN returns all matching records from the left table combined with the right table. +:page-topic-type: reference + +The `LEFT JOIN` returns all matching records from the left table combined with the right table. Even if there are no matching records in the right table, the `LEFT JOIN` still returns a row in the result, with `NULL` in each column from the right table. + +[NOTE] +==== +`LEFT JOIN` is also known as `LEFT OUTER JOIN`. +==== + +== Syntax + +[source,sql] +---- +SELECT column_1, column_2... +FROM table_1 +LEFT JOIN table_2 +ON table_1.matching_field = table_2.matching_field; +---- + +In this syntax: + +. `SELECT column_1, column_2...` defines the columns from both tables where the data is to be selected. +. `FROM table_1` defines the left table as the main table in the `FROM` clause. +. `LEFT JOIN table_2` defines the right table as the table the main table joins. +. `ON table_1.matching_field = table_2.matching_field` sets the join condition after the `ON` keyword with the matching field between the two tables. + +=== Table alias + +A table alias is a temporary name given to a table in a query. Aliases don't change the result; they make queries easier to read. + +[source,sql] +---- +SELECT A.column_1, B.column_2... +FROM table_1 A +LEFT JOIN table_2 B +ON A.matching_field = B.matching_field; +---- + +== Examples + +Create the `item` table: + +[source,sql] +---- +CREATE TABLE item ( + item_no int NOT NULL, + item_name text +); + +INSERT INTO item + (item_no,item_name) +VALUES + (111,'Butter'), + (113,'Tea'), + (116,'Bread'), + (119,'Coffee'); +---- + +[source,sql] +---- +SELECT * FROM item; +---- + +This returns: + +[source,sql] +---- ++-----------+----------------+ +| item_no | item_name | ++-----------+----------------+ +| 111 | Butter | +| 113 | Tea | +| 116 | Bread | +| 119 | Coffee | ++-----------+----------------+ +---- + +Create the `invoice` table: + +[source,sql] +---- +CREATE TABLE invoice ( + inv_no int NOT NULL, + item int, + sold_qty int, + sold_price int +); + +INSERT INTO invoice + (inv_no, item, sold_qty, sold_price) +VALUES + (020219,111,3,9000), + (020220,116,6,30000), + (020221,116,2,10000), + (020222,116,1,5000), + (020223,119,5,20000), + (020224,119,4,16000); +---- + +[source,sql] +---- +SELECT * FROM invoice; +---- + +This returns: + +[source,sql] +---- ++----------+---------+-----------+-------------+ +| inv_no | item | sold_qty | sold_price | ++----------+---------+-----------+-------------+ +| 20219 | 111 | 3 | 9000 | +| 20220 | 116 | 6 | 30000 | +| 20221 | 116 | 2 | 10000 | +| 20222 | 116 | 1 | 5000 | +| 20223 | 119 | 5 | 20000 | +| 20224 | 119 | 4 | 16000 | ++----------+---------+-----------+-------------+ +---- + +A `LEFT JOIN` query against these tables: + +[source,sql] +---- +SELECT item_no, item_name, sold_qty, sold_price +FROM item +LEFT JOIN invoice +ON item.item_no = invoice.item; +---- + +* The `item` table is the left table, and the `invoice` table is the right table. +* The query combines values from the `item` table using `item_no` and matches records using the `item` column from the `invoice` table. +* When records match, Redpanda SQL creates a new row with `item_no`, `item_name`, `sold_qty`, and `sold_price` columns as defined in the `SELECT` clause. +* Otherwise, a new row is created with `NULL` values from the right table (`invoice`). + +The query returns: + +[source,sql] +---- ++-----------+-------------+------------+---------------+ +| item_no | item_name | sold_qty | sold_price | ++-----------+-------------+------------+---------------+ +| 111 | Butter | 3 | 9000 | +| 113 | Tea | null | null | +| 116 | Bread | 6 | 30000 | +| 116 | Bread | 2 | 10000 | +| 116 | Bread | 1 | 5000 | +| 119 | Coffee | 5 | 20000 | +| 119 | Coffee | 4 | 16000 | ++-----------+-------------+------------+---------------+ +---- + +Based on the data from the `item` and `invoice` tables: + +* The result matches the total items in the `item` table: four items. +* The result displays all items from the left table (`item`), even if one item has not been sold. +* Item id `111` (`Butter`) has been sold once for 3 pieces at 9000. +* Item id `113` (`Tea`) has never been sold, so the `sold_qty` and `sold_price` columns are `NULL`. +* Item id `116` (`Bread`) has been sold three times: 6 pieces at 30000, 2 pieces at 10000, and 1 piece at 5000. +* Item id `119` (`Coffee`) has been sold twice: 5 pieces at 20000 and 4 pieces at 16000. + +[TIP] +==== +An `item` can have zero or many invoices. An `invoice` belongs to zero or one `item`. +==== + +The following Venn diagram illustrates the `LEFT JOIN`: + +image::sql/left-join-venn.png[Venn diagram showing a left join between the Item and Invoice tables] diff --git a/modules/reference/pages/sql/sql-clauses/from/outer-join.adoc b/modules/reference/pages/sql/sql-clauses/from/outer-join.adoc new file mode 100644 index 000000000..862502621 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/outer-join.adoc @@ -0,0 +1,220 @@ += OUTER JOIN +:description: The OUTER JOIN returns all the records from the selected fields between two tables, whether the join condition is met or not. +:page-topic-type: reference + +The `OUTER JOIN` (also known as `FULL OUTER JOIN`) returns all the records from the selected fields between the two tables (left and right), whether the join condition is met or not. + +== Inner join vs. outer join + +An `INNER JOIN` returns only matched rows. An `OUTER JOIN` returns all rows from both tables, whether matched or not. + +== Syntax + +[source,sql] +---- +SELECT column_1, column_2... +FROM table_1 +FULL OUTER JOIN table_2 +ON table_1.matching_field = table_2.matching_field; +---- + +In this syntax: + +. `SELECT column_1, column_2...` defines the columns from both tables to display data. +. `FROM table_1` represents `table_1` as the left table in the `FROM` clause. +. `FULL OUTER JOIN table_2` represents `table_2` as the right table in the `FULL OUTER JOIN` condition. +. `ON table_1.matching_field = table_2.matching_field` sets the join condition after the `ON` keyword with the matching field between the two tables. + +=== Table alias + +A table alias is a temporary name given to a table in a query. Aliases don't change the result; they make queries easier to read. + +[source,sql] +---- +SELECT A.column_1, B.column_2... +FROM table_1 A +FULL OUTER JOIN table_2 B +ON A.matching_field = B.matching_field; +---- + +[NOTE] +==== +If there are no matched records from the joined tables, `NULL` values are returned in every column of the table that does not have the matching record. +==== + +== Examples + +Create the `departments` table: + +[source,sql] +---- +CREATE TABLE departments ( + department_id int, + department_name text +); +INSERT INTO departments (department_id,department_name) +VALUES + (1001, 'Sales'), + (1002, 'Marketing'), + (1003, 'HR'), + (1004, 'Project'), + (1005, 'Product'); +---- + +[source,sql] +---- +SELECT * FROM departments; +---- + +This returns: + +[source,sql] +---- ++----------------+------------------+ +| department_id | department_name | ++----------------+------------------+ +| 1001 | Sales | +| 1002 | Marketing | +| 1003 | HR | +| 1004 | Project | +| 1005 | Product | ++----------------+------------------+ +---- + +Create the `employee` table: + +[source,sql] +---- +CREATE TABLE employee ( + employee_id int, + employee_name text, + dept_id int +); +INSERT INTO employee ( + employee_id, + employee_name, + dept_id +) +VALUES + (2001,'Tony Stark', 1002), + (2002,'Christian Bale', 1002), + (2003,'Anne Hailey', 1003), + (2004,'Wilson Cliff', 1004), + (2005,'Susan Oh', 1001), + (2006,'Julian Robert', 1001), + (2007,'Gilbert Tom', null); +---- + +[source,sql] +---- +SELECT * FROM employee; +---- + +This returns: + +[source,sql] +---- ++--------------+-------------------+------------+ +| employee_id | employee_name | dept_id | ++--------------+-------------------+------------+ +| 2001 | Tony Stark | 1002 | +| 2002 | Christian Bale | 1002 | +| 2003 | Anne Hailey | 1003 | +| 2004 | Wilson Cliff | 1004 | +| 2005 | Susan Oh | 1001 | +| 2006 | Julian Robert | 1001 | +| 2007 | Gilbert Tom | null | ++--------------+-------------------+------------+ +---- + +=== FULL OUTER JOIN + +A `FULL OUTER JOIN` query against these tables: + +[source,sql] +---- +SELECT employee_name, department_name +FROM departments +FULL OUTER JOIN employee +ON departments.department_id = employee.dept_id; +---- + +The result shows every department with an employee, the employees who work under a specific department, every department that does not have any employees, and the employees who do not belong to a specific department. + +[source,sql] +---- ++-------------------+-------------------+ +| employee_name | department_name | ++-------------------+-------------------+ +| Julian Robert | Sales | +| Susan Oh | Sales | +| Christian Bale | Marketing | +| Tony Stark | Marketing | +| Anne Hailey | HR | +| Wilson Cliff | Project | +| Gilbert Tom | null | +| null | Product | ++-------------------+-------------------+ +---- + +The following Venn diagram illustrates the `FULL OUTER JOIN`: + +image::sql/outer-join-venn.png[Venn diagram showing a full outer join between the Departments and Employee tables] + +=== FULL OUTER JOIN with WHERE clause + +==== Find departments with no employees + +. To find departments that do not have any employees, add a `WHERE` clause with `NULL`: ++ +[source,sql] +---- +SELECT employee_name, department_name +FROM departments +FULL OUTER JOIN employee +ON departments.department_id = employee.dept_id +WHERE employee_name IS NULL; +---- + +. The query returns: ++ +[source,sql] +---- ++------------------+--------------------+ +| employee_name | department_name | ++------------------+--------------------+ +| null | Product | ++------------------+--------------------+ +---- + +The result indicates that the `Product` department has no employees. + +==== Find employees with no department + +. To find employees who do not belong to any department, add a `WHERE` clause with `NULL`: ++ +[source,sql] +---- +SELECT employee_name, department_name +FROM employee +FULL OUTER JOIN departments +ON employee.dept_id = departments.department_id +WHERE department_name IS NULL; +---- + +. The query returns: ++ +[source,sql] +---- ++------------------+--------------------+ +| employee_name | department_name | ++------------------+--------------------+ +| Gilbert Tom | null | ++------------------+--------------------+ +---- + +The result shows that Gilbert Tom does not belong to any department. + +The following Venn diagram illustrates the `FULL OUTER JOIN` with `WHERE` clause filtering for NULL values: + +image::sql/outer-join-where-venn.png[Venn diagram showing a full outer join with WHERE clause filtering for NULL values] diff --git a/modules/reference/pages/sql/sql-clauses/from/right-join.adoc b/modules/reference/pages/sql/sql-clauses/from/right-join.adoc new file mode 100644 index 000000000..c882148ff --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/right-join.adoc @@ -0,0 +1,165 @@ += RIGHT JOIN +:description: The RIGHT JOIN returns all matching records from the right table combined with the left table. +:page-topic-type: reference + +The `RIGHT JOIN` returns all matching records from the right table combined with the left table. Even if there are no matching records in the left table, the `RIGHT JOIN` still returns a row in the result, with `NULL` in each column from the left table. + +== Syntax + +[source,sql] +---- +SELECT column_1, column_2... +FROM table_1 +RIGHT JOIN table_2 +ON table_1.matching_field = table_2.matching_field; +---- + +In this syntax: + +. `SELECT column_1, column_2...` defines the columns from both tables to display. +. `FROM table_1` defines `table_1` as the left table in the `FROM` clause. +. `RIGHT JOIN table_2` defines `table_2` as the right table in the `RIGHT JOIN` condition. +. `ON table_1.matching_field = table_2.matching_field` sets the join condition after the `ON` keyword with the matching field between the two tables. + +=== Table alias + +A table alias is a temporary name given to a table in a query. Aliases don't change the result; they make queries easier to read. + +[source,sql] +---- +SELECT A.column_1, B.column_2... +FROM table_1 A +RIGHT JOIN table_2 B +ON A.matching_field = B.matching_field; +---- + +== Examples + +Create the `customer` table: + +[source,sql] +---- +CREATE TABLE customer ( + id int NOT NULL, + customer_name text +); + +INSERT INTO customer + (id, customer_name) +VALUES + (201011,'James'), + (200914,'Harry'), + (201029,'Ellie'), + (201925,'Mary'); +---- + +[source,sql] +---- +SELECT * FROM customer; +---- + +This returns: + +[source,sql] +---- ++-----------+----------------+ +| id | customer_name | ++-----------+----------------+ +| 201011 | James | +| 200914 | Harry | +| 201029 | Ellie | +| 201925 | Mary | ++-----------+----------------+ +---- + +Create the `orders` table: + +[source,sql] +---- +CREATE TABLE orders ( + order_id int NOT NULL, + order_date date, + order_amount int, + customer_id int +); + +INSERT INTO orders + (order_id, order_date, order_amount, customer_id) +VALUES + (181893,'2021-10-08',3000,201029), + (181894,'2021-11-18',2000,201029), + (181891,'2021-09-10',9000,201011), + (181892,'2021-10-10',7000,201925), + (181897,'2022-05-27',6700,null), + (181899,'2021-07-22',4500,201011); +---- + +[source,sql] +---- +SELECT * FROM orders; +---- + +This returns: + +[source,sql] +---- ++------------+------------------+---------------+-------------+ +| order_id | order_date | order_amount | customer_id | ++------------+------------------+---------------+-------------+ +| 181893 | 2021-10-08 | 3000 | 201029 | +| 181894 | 2021-11-18 | 2000 | 201029 | +| 181891 | 2021-09-10 | 9000 | 201011 | +| 181892 | 2021-10-10 | 7000 | 201925 | +| 181897 | 2022-05-27 | 6700 | null | +| 181899 | 2021-07-22 | 4500 | 201011 | ++------------+------------------+---------------+-------------+ +---- + +A `RIGHT JOIN` query against these tables: + +[source,sql] +---- +SELECT customer_name, order_date, order_amount +FROM customer +RIGHT JOIN orders +ON customer.id = orders.customer_id; +---- + +* The `customer` table is the left table, and the `orders` table is the right table. +* The query combines values from the `orders` table using `customer_id` and matches records using the `id` column from the `customer` table. +* When records match, Redpanda SQL creates a new row with `customer_name` and `order_amount` columns as defined in the `SELECT` clause. +* Otherwise, a new row is created with `NULL` values from the left table (`customer`). + +The query returns: + +[source,sql] +---- ++------------------+----------------+-----------------+ +| customer_name | order_date | order_amount | ++------------------+----------------+-----------------+ +| James | 2021-09-10 | 9000 | +| James | 2021-07-22 | 4500 | +| Ellie | 2021-10-08 | 3000 | +| Ellie | 2021-11-18 | 2000 | +| Mary | 2021-10-10 | 7000 | +| null | 2022-05-27 | 6700 | ++------------------+----------------+-----------------+ +---- + +Based on the data from the `customer` and `orders` tables: + +* Order id `181893` matches the customer `Ellie`. +* Order id `181894` matches the customer `Ellie`. +* Order id `181891` matches the customer `James`. +* Order id `181899` matches the customer `James`. +* Order id `181892` matches the customer `Mary`. +* Order id `181897` does not match any customer, so the `customer_name` column is `NULL`. + +[NOTE] +==== +A `customer` can have zero or many `orders`. An item from `orders` belongs to zero or one `customer`. +==== + +The following Venn diagram illustrates the `RIGHT JOIN`: + +image::sql/right-join-venn.png[Venn diagram showing a right join between the Customer and Orders tables] diff --git a/modules/reference/pages/sql/sql-clauses/group-by.adoc b/modules/reference/pages/sql/sql-clauses/group-by.adoc new file mode 100644 index 000000000..5555d22dc --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/group-by.adoc @@ -0,0 +1,282 @@ += GROUP BY +:description: The GROUP BY clause returns a group of records from one or more tables that have the same values in specified columns. +:page-topic-type: reference + +The `GROUP BY` clause returns a group of records from one or more tables that have the same values in specified columns. The result of the `GROUP BY` clause returns a single row for each value of the column. + +[NOTE] +==== +You can use xref:reference:sql/sql-functions/aggregate-functions/index.adoc[aggregate functions] such as `COUNT()`, `MAX()`, `MIN()`, and `SUM()` to perform operations on the grouped values in the `SELECT` statement. +==== + +== Syntax + +[source,sql] +---- +SELECT +column_1, column_2, aggregate_function(column_3) +FROM +table_name +GROUP BY +column_1, column_2,...; +---- + +This syntax includes the following elements: + +* `SELECT column_1, column_2, aggregate_function(column_3)` defines the columns to group (`column_1, column_2`) and the column to apply an aggregate function to (`column_3`). +* `FROM table_name` defines the table to retrieve data from. +* `GROUP BY column_1, column_2,...;` lists the columns to group. + +[NOTE] +==== +Each column you group on must also appear in the `SELECT` list, and each column you group on must appear in the `GROUP BY` clause. +==== + +=== Syntax with WHERE clause + +The `GROUP BY` clause must appear after the `WHERE` clause: + +[source,sql] +---- +SELECT +column_1, column_2, aggregate_function(column_3) +FROM +table_name +WHERE +conditions +GROUP BY +column_1, column_2,...; +---- + +== Examples + +Assume there are two tables: `customer` and `orders`. + +Create the `customer` table: + +[source,sql] +---- +CREATE TABLE customer ( + cust_id int, + cust_name text +); +INSERT INTO customer + (cust_id, cust_name) +VALUES + (11001, 'Maya'), + (11003, 'Ricky'), + (11009, 'Sean'), + (11008, 'Chris'), + (11002, 'Emily'), + (11005, 'Rue'), + (11007, 'Tom'), + (11006, 'Casey'); +---- + +[source,sql] +---- +SELECT * FROM customer; +---- + +This returns: + +[source,sql] +---- ++-----------+------------+ +| cust_id | cust_name | ++-----------+------------+ +| 11001 | Maya | +| 11003 | Ricky | +| 11009 | Sean | +| 11008 | Chris | +| 11002 | Emily | +| 11005 | Rue | +| 11007 | Tom | +| 11006 | Casey | ++-----------+------------+ +---- + +Create the `orders` table: + +[source,sql] +---- +CREATE TABLE orders ( + order_id int, + order_date date, + order_prod text, + order_qty int, + order_price int, + cust_id int +); +INSERT INTO orders + (order_id, order_date, order_prod, order_qty, order_price, cust_id) +VALUES + (999191, '2021-01-08','Butter', 1, 4000, 11001), + (999192, '2021-09-30','Sugar', 1, 10000, 11002), + (999193, '2021-04-17','Sugar', 1, 10000, 11009), + (999194, '2021-08-29','Flour', 4, 20000, 11006), + (999195, '2021-05-04','Sugar', 2, 20000, 11008), + (999196, '2021-07-27','Butter', 2, 8000, 11006), + (999197, '2021-10-30','Flour', 2, 10000, 11001), + (999198, '2021-12-18','Flour', 2, 10000, 11007); +---- + +[source,sql] +---- +SELECT * FROM orders; +---- + +This returns: + +[source,sql] +---- ++------------+--------------+--------------+-------------+---------------+-----------+ +| order_id | order_date | order_prod | order_qty | order_price | cust_id | ++------------+--------------+--------------+-------------+---------------+-----------+ +| 999191 | 2021-01-08 | Butter | 1 | 4000 | 11001 | +| 999192 | 2021-09-30 | Sugar | 1 | 10000 | 11002 | +| 999193 | 2021-04-17 | Sugar | 1 | 10000 | 11009 | +| 999194 | 2021-08-29 | Flour | 4 | 20000 | 11006 | +| 999195 | 2021-05-04 | Sugar | 2 | 20000 | 11008 | +| 999196 | 2021-07-27 | Butter | 2 | 8000 | 11006 | +| 999197 | 2021-10-30 | Flour | 2 | 10000 | 11001 | +| 999198 | 2021-12-18 | Flour | 2 | 10000 | 11007 | ++------------+--------------+--------------+-------------+---------------+-----------+ +---- + +=== Basic GROUP BY + +This query returns all distinct product names from the `orders` table: + +[source,sql] +---- +SELECT order_prod +FROM orders +GROUP BY order_prod; +---- + +The query returns: + +[source,sql] +---- ++--------------+ +| order_prod | ++--------------+ +| Flour | +| Sugar | +| Butter | ++--------------+ +---- + +=== GROUP BY on multiple columns + +This example uses multiple columns in the `GROUP BY` clause: + +[source,sql] +---- +SELECT order_id, order_prod +FROM orders +GROUP BY order_id, order_prod; +---- + +The query returns: + +[source,sql] +---- ++-----------+--------------+ +| order_id | order_prod | ++-----------+--------------+ +| 999194 | Flour | +| 999191 | Butter | +| 999196 | Butter | +| 999192 | Sugar | +| 999195 | Sugar | +| 999198 | Flour | +| 999193 | Sugar | +| 999197 | Flour | ++-----------+--------------+ +---- + +=== GROUP BY with aggregate functions + +This example calculates the total amount each customer has paid for orders using the `SUM()` aggregate function: + +[source,sql] +---- +SELECT cust_id, SUM(order_price) +FROM orders +GROUP BY cust_id; +---- + +The query returns: + +[source,sql] +---- ++-----------+----------+ +| cust_id | sum | ++-----------+----------+ +| 11009 | 10000 | +| 11007 | 10000 | +| 11006 | 28000 | +| 11002 | 10000 | +| 11001 | 14000 | +| 11008 | 20000 | ++-----------+----------+ +---- + +=== GROUP BY with JOIN condition + +This query joins the `orders` table with the `customer` table and groups customers by name. It uses `COUNT()` to count the number of products each customer has purchased: + +[source,sql] +---- +SELECT C.cust_name, COUNT(order_prod) +FROM orders O +JOIN customer C ON O.cust_id = C.cust_id +GROUP BY C.cust_name; +---- + +The query returns: + +[source,sql] +---- ++------------+---------+ +| cust_name | count | ++------------+---------+ +| Tom | 1 | +| Chris | 1 | +| Casey | 2 | +| Maya | 2 | +| Sean | 1 | +| Emily | 1 | ++------------+---------+ +---- + +=== GROUP BY with date data type + +The `order_date` column uses the `DATE` data type. This example groups the order quantity and total price by date using the `DATE()` function: + +[source,sql] +---- +SELECT DATE(order_date), order_qty, SUM(order_price) +FROM orders +GROUP BY order_qty, DATE(order_date); +---- + +The query returns: + +[source,sql] +---- ++---------------+------------+---------+ +| date | order_qty | sum | ++---------------+------------+---------+ +| 2021-07-27 | 2 | 8000 | +| 2021-08-29 | 4 | 20000 | +| 2021-04-17 | 1 | 10000 | +| 2021-09-30 | 1 | 10000 | +| 2021-05-04 | 2 | 20000 | +| 2021-01-08 | 1 | 4000 | +| 2021-12-18 | 2 | 10000 | +| 2021-10-30 | 2 | 10000 | ++---------------+------------+---------+ +---- diff --git a/modules/reference/pages/sql/sql-clauses/having.adoc b/modules/reference/pages/sql/sql-clauses/having.adoc new file mode 100644 index 000000000..e5ae50e6c --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/having.adoc @@ -0,0 +1,244 @@ += HAVING +:description: The HAVING clause specifies a search condition using an aggregate function. +:page-topic-type: reference + +The `HAVING` clause specifies a search condition using an xref:reference:sql/sql-functions/aggregate-functions/index.adoc[aggregate function]. It filters the records returned from a `GROUP BY` clause that do not fulfill the specified condition. + +== Differences between WHERE and HAVING + +[cols="51%,49%",options="header"] +|=== +|WHERE |HAVING +|The `GROUP BY` clause appears after the `WHERE` clause. |The `GROUP BY` clause appears before the `HAVING` clause. +|The `WHERE` clause cannot work with an aggregate function. |The `HAVING` clause can work with an aggregate function. +|The `WHERE` clause filters individual records. |The `HAVING` clause filters groups of records. +|=== + +== Syntax + +[source,sql] +---- +SELECT column_1, column_2,... +FROM table_name +GROUP BY column_name(s) +HAVING condition_aggregate_function +---- + +This syntax includes the following elements: + +* `SELECT column_1, column_2,...` selects the columns to display. +* `FROM table_name` selects the table to retrieve data from. +* `GROUP BY column_name(s)` lists the columns to group. +* `HAVING condition_aggregate_function` provides the condition for filtering rows formed by the `GROUP BY` clause. The condition can use an aggregate function such as `SUM()`, `COUNT()`, or `MIN()`. + +== Examples + +Assume there are two tables: `student` and `score`. + +Create the `student` table: + +[source,sql] +---- +CREATE TABLE student ( + stud_id int, + stud_name text +); +INSERT INTO student + (stud_id, stud_name) +VALUES + (992831192, 'Mary'), + (992811191, 'Bobby'), + (992311195, 'Sean'), + (998311193, 'Harry'), + (998311194, 'William'), + (928311197, 'Kate'), + (928311190, 'Tom'), + (928311199, 'Sully'), + (998311196, 'Susan'); +---- + +[source,sql] +---- +SELECT * FROM student; +---- + +This returns: + +[source,sql] +---- ++------------+------------+ +| stud_id | stud_name | ++------------+------------+ +| 992831192 | Mary | +| 992811191 | Bobby | +| 992311195 | Sean | +| 998311193 | Harry | +| 998311194 | William | +| 928311197 | Kate | +| 928311190 | Tom | +| 928311199 | Sully | +| 998311196 | Susan | ++------------+------------+ +---- + +Create the `score` table: + +[source,sql] +---- +CREATE TABLE score ( + score_id int, + subject text, + score_val int, + stud_id int, + score_stat text +); +INSERT INTO score + (score_id, subject, score_val, stud_id, score_stat) +VALUES + (12221, 'Math', 90, 992811191, 'PASSED'), + (12222, 'Biology', 90, 992811191, 'PASSED'), + (12223, 'Art', 80, 992831192, 'PASSED'), + (12224, 'History', 70, 928311197, 'FAILED'), + (12225, 'Physics', 75, 928311190, 'FAILED'), + (12226, 'Art', 85, 928311197, 'PASSED'), + (12227, 'Biology', 90, 998311196, 'PASSED'), + (12228, 'Biology', 70, 928311199, 'FAILED'), + (12229, 'Physics', 80, 998311194, 'PASSED'), + (12231, 'Math', 80, 998311193, 'PASSED'), + (12232, 'History', 90, 992811191, 'PASSED'), + (12233, 'Math', 70, 998311194, 'FAILED'), + (12234, 'Math', 80, 928311190, 'PASSED'); +---- + +[source,sql] +---- +SELECT * FROM score; +---- + +This returns: + +[source,sql] +---- ++-----------+----------+------------+------------+-------------+ +| score_id | subject | score_val | stud_id | score_stat | ++-----------+----------+------------+------------+-------------+ +| 12221 | Math | 90 | 992811191 | PASSED | +| 12222 | Biology | 90 | 992811191 | PASSED | +| 12223 | Art | 80 | 992831192 | PASSED | +| 12224 | History | 70 | 928311197 | FAILED | +| 12225 | Physics | 75 | 928311190 | FAILED | +| 12226 | Art | 85 | 928311197 | PASSED | +| 12227 | Biology | 90 | 998311196 | PASSED | +| 12228 | Biology | 70 | 928311199 | FAILED | +| 12229 | Physics | 80 | 998311194 | PASSED | +| 12231 | Math | 80 | 998311193 | PASSED | +| 12232 | History | 90 | 992811191 | PASSED | +| 12233 | Math | 70 | 998311194 | FAILED | +| 12234 | Math | 80 | 928311190 | PASSED | ++-----------+----------+------------+------------+-------------+ +---- + +=== HAVING clause with AVG function + +The following example uses the `AVG` aggregate function to filter subjects with an average score greater than 80: + +[source,sql] +---- +SELECT subject +FROM score +GROUP BY subject +HAVING AVG (score_val) > 80; +---- + +The query returns: + +[source,sql] +---- ++-----------+ +| subject | ++-----------+ +| Art | +| Biology | ++-----------+ +---- + +=== HAVING clause with COUNT function + +The following query lists subjects that have more than two `PASSED` scores: + +[source,sql] +---- +SELECT SUM(CASE WHEN score_stat = 'PASSED' THEN 1 ELSE 0 END) AS passed_count, subject +FROM score +GROUP BY subject +HAVING SUM(CASE WHEN score_stat = 'PASSED' THEN 1 ELSE 0 END) > 2; +---- + +The query returns `Math` as the only subject with more than two `PASSED` values: + +[source,sql] +---- ++--------------+---------+ +| passed_count | subject | ++--------------+---------+ +| 3 | Math | ++--------------+---------+ +---- + +=== HAVING clause with MAX function + +Assume the minimum score criterion is 75. The following query finds the maximum score for each subject where the maximum is greater than 75: + +[source,sql] +---- +SELECT subject, MAX(score_val) +FROM score +GROUP BY subject +HAVING MAX(score_val)>75; +---- + +The query returns: + +[source,sql] +---- ++-----------+--------+ +| subject | max | ++-----------+--------+ +| Math | 90 | +| History | 90 | +| Physics | 80 | +| Art | 85 | +| Biology | 90 | ++-----------+--------+ +---- + +=== HAVING with JOIN condition + +To find students who have failed in their subjects, combine the `student` and `score` tables using `JOIN` and apply a condition on the `score_stat` column: + +[source,sql] +---- +SELECT stud_name, subject, score_val, score_stat +FROM student A +JOIN score C ON A.stud_id = C.stud_id +GROUP BY stud_name, subject, score_val, score_stat +HAVING score_stat = 'FAILED'; +---- + +* The `JOIN` clause combines the two tables. +* The `GROUP BY` clause filters records from both tables based on the specified columns. +* The `HAVING` clause filters the records returned from the `GROUP BY` clause according to the specified condition. + +The query returns: + +[source,sql] +---- ++------------+------------+------------+--------------+ +| stud_name | subject | score_val | score_stat | ++------------+------------+------------+--------------+ +| Kate | History | 70 | FAILED | +| Sully | Biology | 70 | FAILED | +| Tom | Physics | 75 | FAILED | +| William | Math | 70 | FAILED | ++------------+------------+------------+--------------+ +---- diff --git a/modules/reference/pages/sql/sql-clauses/index.adoc b/modules/reference/pages/sql/sql-clauses/index.adoc new file mode 100644 index 000000000..3d6333c33 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/index.adoc @@ -0,0 +1,22 @@ += SQL clauses +:description: SQL clauses define how data is retrieved, filtered, and manipulated. +:page-topic-type: reference + +SQL clauses define how data is retrieved, filtered, and manipulated. They specify what data to include, how to organize it, and the conditions rows must meet to appear in the result set. + +Redpanda SQL supports the following clauses: + +[cols="<40%,<60%",options="header"] +|=== +|Clause |Description +|xref:reference:sql/sql-clauses/from/from.adoc[FROM] |Defines the source tables or views for the query. +|xref:reference:sql/sql-clauses/where.adoc[WHERE] |Filters rows based on specified conditions. +|xref:reference:sql/sql-clauses/group-by.adoc[GROUP BY] |Groups rows sharing common values in specified columns for aggregation. +|xref:reference:sql/sql-clauses/having.adoc[HAVING] |Filters grouped rows based on aggregate conditions. +|xref:reference:sql/sql-clauses/order-by.adoc[ORDER BY] |Sorts the result set by specified columns in ascending or descending order. +|xref:reference:sql/sql-clauses/limit.adoc[LIMIT] |Restricts the number of rows returned by the query. +|xref:reference:sql/sql-clauses/offset.adoc[OFFSET] |Skips a specified number of rows before returning results. +|xref:reference:sql/sql-clauses/set-operations/index.adoc[SET OPERATIONS] |Combines or compares results from multiple `SELECT` statements, such as `UNION`, `INTERSECT`, and `EXCEPT`. +|xref:reference:sql/sql-clauses/with.adoc[WITH] |Creates temporary named result sets (Common Table Expressions) for reuse within queries. +|xref:reference:sql/sql-clauses/over-window.adoc[OVER] |Specifies the window over which window functions operate on subsets of data. +|=== diff --git a/modules/reference/pages/sql/sql-clauses/limit.adoc b/modules/reference/pages/sql/sql-clauses/limit.adoc new file mode 100644 index 000000000..2dc24eb8c --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/limit.adoc @@ -0,0 +1,206 @@ += LIMIT +:description: The LIMIT clause restricts the number of records returned by a SELECT statement. +:page-topic-type: reference + +`LIMIT` is an optional clause used with `SELECT` statements to retrieve records from one or more tables. It specifies the number of records a query returns after filtering the data. + +== Syntax + +The `LIMIT` clause has two syntax forms that produce identical results. The first is the standard PostgreSQL form: + +[source,sql] +---- +SELECT column_list +FROM table_name +ORDER BY sort_expression +LIMIT row_count +---- + +The second is the ANSI SQL form (`FETCH NEXT ... ROWS ONLY`): + +[source,sql] +---- +SELECT column_list +FROM table_name +ORDER BY sort_expression +FETCH NEXT row_count ROWS ONLY +---- + +Where: + +* `column_list`: The columns or calculations to retrieve. +* `table_name`: The tables to retrieve records from. +* `ORDER BY`: An expression used to order the results, either ascending (`ASC`) or descending (`DESC`). +* `LIMIT row_count`: The number of rows to return based on `row_count`. + +[NOTE] +==== +You can list more than one table in the `FROM` clause. +==== + +=== Special cases + +* If `row_count` is `NULL`, the query produces the same result as a query without a `LIMIT` clause. +* If `row_count` is zero, the statement returns an empty set. + +== Examples + +This example creates a `comporders` table and inserts values into it: + +[source,sql] +---- +CREATE TABLE comporders +( + order_id int, + cust_name text, + prod_name text, + prod_price float, + status text +); + +INSERT INTO comporders +VALUES +(1002, 'Mike', 'Lenovo IdeaPad Flex 5', 600, 'PAID'), +(1003, 'Sean', 'Acer Aspire 3', 450, 'PAID'), +(1004, 'Victor', 'Microsoft Surface Laptop Go 2', 500, 'PENDING'), +(1005, 'Lewis', 'Lenovo Duet 5i', 700, 'PAID'), +(1006, 'David', 'Acer Swift 3', 640, 'PAID'), +(1007, 'Meghan', 'Lenovo IdeaPad Duet 5 Chromebook', 750, 'PAID'), +(1008, 'Harry', 'Apple iPad Air', 449, 'PENDING'), +(1009, 'Steve', 'Microsoft Surface Go 3', 680, 'PENDING'), +(1010, 'Omar', 'HP Victus 16', 800,'PAID'); +---- + +To verify the inserted values, run: + +[source,sql] +---- +SELECT * FROM comporders; +---- + +The query returns: + +[source,sql] +---- ++-----------+------------+----------------------------------+-------------+----------+ +| order_id | cust_name | prod_name | prod_price | status | ++-----------+------------+----------------------------------+-------------+----------+ +| 1002 | Mike | Lenovo IdeaPad Flex 5 | 600 | PAID | +| 1003 | Sean | Acer Aspire 3 | 450 | PAID | +| 1004 | Victor | Microsoft Surface Laptop Go 2 | 500 | PENDING | +| 1005 | Lewis | Lenovo Duet 5i | 700 | PAID | +| 1006 | David | Acer Swift 3 | 640 | PAID | +| 1007 | Meghan | Lenovo IdeaPad Duet 5 Chromebook | 750 | PAID | +| 1008 | Harry | Apple iPad Air | 449 | PENDING | +| 1009 | Steve | Microsoft Surface Go 3 | 680 | PENDING | +| 1010 | Omar | HP Victus 16 | 800 | PAID | ++-----------+------------+----------------------------------+-------------+----------+ +---- + +=== LIMIT with ORDER BY expression + +This example uses the `LIMIT` clause to get the first four orders sorted by `order_id`: + +[source,sql] +---- +SELECT order_id, prod_name, prod_price +FROM comporders +ORDER BY order_id +LIMIT 4; +---- + +The query returns: + +[source,sql] +---- ++-----------+-------------------------------+-------------+ +| order_id | prod_name | prod_price | ++-----------+-------------------------------+-------------+ +| 1002 | Lenovo IdeaPad Flex 5 | 600 | +| 1003 | Acer Aspire 3 | 450 | +| 1004 | Microsoft Surface Laptop Go 2 | 500 | +| 1005 | Lenovo Duet 5i | 700 | ++-----------+-------------------------------+-------------+ +---- + +=== LIMIT with ASC or DESC + +You can use the `LIMIT` clause to select rows with the highest or lowest values from a table. + +. To get the five most expensive orders, sort by `prod_price` in descending order (`DESC`) and use `LIMIT` to return the first five rows: ++ +[source,sql] +---- +SELECT * FROM comporders +ORDER BY prod_price DESC +LIMIT 5; +---- ++ +The query returns: ++ +[source,sql] +---- ++-----------+------------+----------------------------------+-------------+----------+ +| order_id | cust_name | prod_name | prod_price | status | ++-----------+------------+----------------------------------+-------------+----------+ +| 1010 | Omar | HP Victus 16 | 800 | PAID | +| 1007 | Meghan | Lenovo IdeaPad Duet 5 Chromebook | 750 | PAID | +| 1005 | Lewis | Lenovo Duet 5i | 700 | PAID | +| 1009 | Steve | Microsoft Surface Go 3 | 680 | PENDING | +| 1006 | David | Acer Swift 3 | 640 | PAID | ++-----------+------------+----------------------------------+-------------+----------+ +---- + +. To get the five cheapest orders, sort by `prod_price` in ascending order (`ASC`) and use `LIMIT` to return the first five rows: ++ +[source,sql] +---- +SELECT * FROM comporders +ORDER BY prod_price ASC +LIMIT 5; +---- ++ +The query returns: ++ +[source,sql] +---- ++-----------+------------+----------------------------------+-------------+----------+ +| order_id | cust_name | prod_name | prod_price | status | ++-----------+------------+----------------------------------+-------------+----------+ +| 1008 | Harry | Apple iPad Air | 449 | PENDING | +| 1003 | Sean | Acer Aspire 3 | 450 | PAID | +| 1004 | Victor | Microsoft Surface Laptop Go 2 | 500 | PENDING | +| 1002 | Mike | Lenovo IdeaPad Flex 5 | 600 | PAID | +| 1006 | David | Acer Swift 3 | 640 | PAID | ++-----------+------------+----------------------------------+-------------+----------+ +---- + +=== LIMIT with OFFSET + +In this example, the `LIMIT` and `OFFSET` clauses get five orders: + +[source,sql] +---- +SELECT * FROM comporders +LIMIT 5 OFFSET 2; +---- + +The query returns: + +[source,sql] +---- ++-----------+------------+----------------------------------+-------------+----------+ +| order_id | cust_name | prod_name | prod_price | status | ++-----------+------------+----------------------------------+-------------+----------+ +| 1004 | Victor | Microsoft Surface Laptop Go 2 | 500 | PENDING | +| 1005 | Lewis | Lenovo Duet 5i | 700 | PAID | +| 1006 | David | Acer Swift 3 | 640 | PAID | +| 1007 | Meghan | Lenovo IdeaPad Duet 5 Chromebook | 750 | PAID | +| 1008 | Harry | Apple iPad Air | 449 | PENDING | ++-----------+------------+----------------------------------+-------------+----------+ +---- + +In this result: + +* Orders with `order_id` 1002 and 1003 are not displayed because the `OFFSET` value is `2`, so the first two rows are skipped. +* Orders with `order_id` 1009 and 1010 are not displayed because the `LIMIT` value is `5`, so only five rows are returned. diff --git a/modules/reference/pages/sql/sql-clauses/offset.adoc b/modules/reference/pages/sql/sql-clauses/offset.adoc new file mode 100644 index 000000000..b61f5d1df --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/offset.adoc @@ -0,0 +1,76 @@ += OFFSET +:description: The OFFSET clause skips a specified number of records from the result set. +:page-topic-type: reference + +The `OFFSET` clause skips a specified number of records from the result set. + +== Syntax + +[source,sql] +---- +SELECT columns +FROM table_name +OFFSET num; +---- + +Where: + +* `columns`: The columns to fetch. +* `table_name`: The table to fetch records from. +* `num`: The number of records to skip. + +== Examples + +The following example uses a `salaryemp` table. + +[source,sql] +---- +SELECT * FROM salaryemp ORDER BY emp_sal; +---- + +The query returns: + +[source,sql] +---- ++-----------+------------+----------------+-------------+ +| emp_id | emp_name | emp_div | emp_sal | ++-----------+------------+----------------+-------------+ +| 1008 | Harry | Operations | 4500 | +| 1005 | Lewis | Sales | 5500 | +| 1002 | Mike | Marketing | 6000 | +| 1003 | Sean | Marketing | 6500 | +| 1009 | Steve | Marketing | 6800 | +| 1004 | Victor | Finance | 7000 | +| 1007 | Meghan | Finance | 7500 | +| 1006 | David | Marketing | 8000 | +| 1010 | Omar | Finance | 8000 | +| 1011 | David | Sales | 8200 | ++-----------+------------+----------------+-------------+ +---- + +The following query skips the first three rows and returns the next five, ordered by salary: + +[source,sql] +---- +SELECT * FROM salaryemp +ORDER BY emp_sal +LIMIT 5 OFFSET 3; +---- + +* `OFFSET 3` skips the first three rows (`Harry`, `Lewis`, `Mike`). +* `LIMIT 5` returns the next five rows. + +The query returns: + +[source,sql] +---- ++-----------+------------+----------------+-------------+ +| emp_id | emp_name | emp_div | emp_sal | ++-----------+------------+----------------+-------------+ +| 1003 | Sean | Marketing | 6500 | +| 1009 | Steve | Marketing | 6800 | +| 1004 | Victor | Finance | 7000 | +| 1007 | Meghan | Finance | 7500 | +| 1006 | David | Marketing | 8000 | ++-----------+------------+----------------+-------------+ +---- diff --git a/modules/reference/pages/sql/sql-clauses/order-by.adoc b/modules/reference/pages/sql/sql-clauses/order-by.adoc new file mode 100644 index 000000000..0305d3f97 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/order-by.adoc @@ -0,0 +1,279 @@ += ORDER BY +:description: The ORDER BY clause sorts the rows returned by a SELECT statement. +:page-topic-type: reference + +The `ORDER BY` clause sorts the rows returned by a `SELECT` statement. + +== Syntax + +[source,sql] +---- +SELECT columns +FROM table_name +ORDER BY sort_expression1 [ASC | DESC] [NULLS FIRST | NULLS LAST]; +---- + +=== Parameters + +* `columns`: The columns to retrieve. +* `table_name`: The table to retrieve records from. +* `ORDER BY`: The expression used to order the results. +* `ASC` or `DESC`: Optional. Specifies whether results are returned in ascending or descending order. Default is `ASC`. +* `NULLS FIRST` or `NULLS LAST`: Optional. Specifies where `NULL` values appear in the sort order. `NULLS FIRST` places `NULL` values before non-null values. `NULLS LAST` places `NULL` values after non-null values. The default is `NULLS LAST` for `ASC` order and `NULLS FIRST` for `DESC` order. + +== Examples + +The following examples use a table called `salaryemp`. To create the table, run the query: + +[source,sql] +---- +CREATE TABLE salaryemp +( + emp_id int, + emp_name text, + emp_div text, + emp_sal int +); + +INSERT INTO salaryemp +VALUES +(1002, 'Mike', 'Marketing', 6000), +(1003, 'Sean', 'Marketing', 6500), +(1004, 'Victor', 'Finance', 7000), +(1005, 'Lewis', 'Sales', 5500), +(1006, 'David', 'Marketing', 8000), +(1007, 'Meghan', 'Finance', 7500), +(1008, 'Harry', 'Operations', 4500), +(1009, 'Steve', 'Marketing', 6800), +(1010, 'Omar', 'Finance', 8000), +(1011, 'David', 'Sales', 8200); +---- + +To verify the inserted values, run: + +[source,sql] +---- +SELECT * FROM salaryemp; +---- + +The query returns: + +[source,sql] +---- ++-----------+------------+----------------+-------------+ +| emp_id | emp_name | emp_div | emp_sal | ++-----------+------------+----------------+-------------+ +| 1002 | Mike | Marketing | 6000 | +| 1003 | Sean | Marketing | 6500 | +| 1004 | Victor | Finance | 7000 | +| 1005 | Lewis | Sales | 5500 | +| 1006 | David | Marketing | 8000 | +| 1007 | Meghan | Finance | 7500 | +| 1008 | Harry | Operations | 4500 | +| 1009 | Steve | Marketing | 6800 | +| 1010 | Omar | Finance | 8000 | +| 1011 | David | Sales | 8200 | ++-----------+------------+----------------+-------------+ +---- + +=== ORDER BY in ascending order + +This example uses the `ORDER BY` clause to sort employees by their division: + +[source,sql] +---- +SELECT emp_name, emp_div +FROM salaryemp +ORDER BY emp_div; +---- + +The query returns: + +[source,sql] +---- ++------------+----------------+ +| emp_name | emp_div | ++------------+----------------+ +| Victor | Finance | +| Omar | Finance | +| Meghan | Finance | +| Mike | Marketing | +| Sean | Marketing | +| David | Marketing | +| Steve | Marketing | +| Harry | Operations | +| Lewis | Sales | +| David | Sales | ++------------+----------------+ +---- + +=== ORDER BY in descending order + +The following statement selects all records from the `salaryemp` table and sorts them by `emp_sal` in descending order: + +[source,sql] +---- +SELECT * FROM salaryemp +ORDER BY emp_sal DESC; +---- + +The query returns: + +[source,sql] +---- ++-----------+------------+----------------+-------------+ +| emp_id | emp_name | emp_div | emp_sal | ++-----------+------------+----------------+-------------+ +| 1011 | David | Sales | 8200 | +| 1006 | David | Marketing | 8000 | +| 1010 | Omar | Finance | 8000 | +| 1007 | Meghan | Finance | 7500 | +| 1004 | Victor | Finance | 7000 | +| 1009 | Steve | Marketing | 6800 | +| 1003 | Sean | Marketing | 6500 | +| 1002 | Mike | Marketing | 6000 | +| 1005 | Lewis | Sales | 5500 | +| 1008 | Harry | Operations | 4500 | ++-----------+------------+----------------+-------------+ +---- + +=== ORDER BY with both ASC and DESC parameters + +The following statement selects all records from the `salaryemp` table and sorts the rows by `emp_sal` in ascending order and `emp_div` in descending order: + +[source,sql] +---- +SELECT * FROM salaryemp +ORDER BY emp_sal ASC, emp_div DESC; +---- + +The query returns: + +[source,sql] +---- ++-----------+------------+----------------+-------------+ +| emp_id | emp_name | emp_div | emp_sal | ++-----------+------------+----------------+-------------+ +| 1009 | Harry | Operations | 4500 | +| 1005 | Lewis | Sales | 5500 | +| 1002 | Mike | Marketing | 6000 | +| 1003 | Sean | Marketing | 6500 | +| 1009 | Steve | Marketing | 6800 | +| 1004 | Victor | Finance | 7000 | +| 1007 | Meghan | Finance | 7500 | +| 1006 | David | Marketing | 8000 | +| 1010 | Omar | Finance | 8000 | +| 1011 | David | Sales | 8200 | ++-----------+------------+----------------+-------------+ +---- + +=== ORDER BY with TEXT data types + +In this example, two small tables are created with `TEXT` data: + +[source,sql] +---- +CREATE TABLE strings +( + column1 text +); + +INSERT INTO strings +VALUES ('A'), ('B'), ('a'), ('b'); + +CREATE TABLE texts +( + column1 TEXT +); + +INSERT INTO texts +VALUES ('A'), ('B'), ('a'), ('b'); +---- + +When using the `ORDER BY` clause with these data types, records with uppercase letters are sorted lexicographically first, followed by records with lowercase letters. + +[source,sql] +---- +SELECT * FROM strings ORDER BY column1; +SELECT * FROM texts ORDER BY column1; +---- + +The query returns: + +[source,sql] +---- + column1 +--------- + A + B + a + b +---- + +=== ORDER BY with INTERVAL data type + +For this example, create a new table called `interval_data`: + +[source,sql] +---- +CREATE TABLE interval_data ( + duration INTERVAL +); + +INSERT INTO interval_data (duration) +VALUES + (INTERVAL '1 month 30 days 20 hours'), + (INTERVAL '2 months 20 hours'), + (INTERVAL '1 month 30 days 19 hours'), + (INTERVAL '2 months 1 hours'); +---- + +`ORDER BY` on an `INTERVAL` column sorts values by their total duration. For example, `1 month 30 days 20 hours` is greater than `2 months 1 hour` because `1 month` equals `30 days`, making the first interval equivalent to `60 days 20 hours` versus `60 days 1 hour`. + +[source,sql] +---- +SELECT * FROM interval_data ORDER BY duration; +---- + +The query returns: + +[source,sql] +---- + duration +------------------------ + 2 mons 01:00:00 + 1 mon 30 days 19:00:00 + 1 mon 30 days 20:00:00 + 2 mons 20:00:00 +---- + +This total-duration sorting also applies when mixing `hours` and `days`. For example, `24 hours 5 minutes` and `1 day 5 minutes` represent the same total duration: + +[source,sql] +---- +INSERT INTO interval_data (duration) +VALUES + (INTERVAL '24 hours 5 minutes'), + (INTERVAL '1 day 5 minutes'), + (INTERVAL '1 day 2 minutes'); +---- + +[source,sql] +---- +SELECT * FROM interval_data ORDER BY duration; +---- + +The query returns: + +[source,sql] +---- + duration +------------------------ + 1 day 00:02:00 + 24:05:00 + 1 day 00:05:00 + 2 mons 01:00:00 + 1 mon 30 days 19:00:00 + 1 mon 30 days 20:00:00 + 2 mons 20:00:00 +---- diff --git a/modules/reference/pages/sql/sql-clauses/over-window.adoc b/modules/reference/pages/sql/sql-clauses/over-window.adoc new file mode 100644 index 000000000..464df7078 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/over-window.adoc @@ -0,0 +1,163 @@ += OVER and WINDOW +:description: Window functions use the OVER and WINDOW clauses to define the set of rows over which the function operates. +:page-topic-type: reference + +Window functions use a set of clauses to define the rows they operate over. Some of these clauses are mandatory and others are optional. + +== OVER clause + +The `OVER` clause defines a window, or user-specified set of rows, within a query result set. It is mandatory for window functions and differentiates them from other SQL functions. + +=== Syntax + +[source,sql] +---- +OVER (PARTITION BY rows1 ORDER BY rows2) +---- + +The `PARTITION BY` clause is a list of expressions interpreted in much the same way as the elements of a `GROUP BY` clause, except that they are always simple expressions and never the name or number of an output column. These expressions can also contain aggregate function calls, which are not allowed in a regular `GROUP BY` clause (windowing occurs after grouping and aggregation). + +`[ PARTITION BY expression [, ...] ]` (optional window partition) + +The `ORDER BY` clause used in the `OVER` clause is a list of expressions interpreted in much the same way as the elements of a statement-level `ORDER BY` clause, except that the expressions are always taken as simple expressions and never the name or number of an output column. + +`[ ORDER BY expression [ ASC | DESC | USING operator ] [ NULLS { FIRST | LAST } ] [, ...] ]` (optional window ordering) + +== WINDOW clause + +The optional `WINDOW` clause defines one or more named window specifications, as a `window_name` and `window_definition` pair. + +=== Syntax + +[source,sql] +---- +WINDOW window_name AS (window_definition) [, ...] +---- + +`window_name` is a name that can be referenced from `OVER` clauses or subsequent window definitions. Note the following: + +* The `window_definition` may use an `existing_window_name` to refer to a previous `window_definition` in the `WINDOW` clause, but the previous `window_definition` must not specify a frame clause. +* The `window_definition` copies the `PARTITION BY` and `ORDER BY` clauses from the previous `window_definition`, but it cannot specify its own `PARTITION BY` clause. It can specify an `ORDER BY` clause if the previous `window_definition` does not have one. + +`[ existing_window_name ] [ PARTITION BY clause ] [ ORDER BY clause ] [ frame clause ]` (all arguments are optional) + +[NOTE] +==== +A `window_definition` without arguments defines a window with all rows, without partition or ordering. +==== + +The frame clause defines the window frame for window functions that depend on the frame (not all do). The window frame is a set of related rows for each row of the query (called the current row). + +* `{ RANGE | ROWS | GROUPS } frame_start [ frame_exclusion ]` +* `{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end [ frame_exclusion ]` + +[NOTE] +==== +Redpanda SQL supports `ROWS` and `RANGE` frame modes. The `GROUPS` frame mode and `frame_exclusion` are not supported. +==== + +Note the following: + +* `frame_start` and `frame_end` can be one of: `UNBOUNDED PRECEDING`, `offset PRECEDING`, `CURRENT ROW`, `offset FOLLOWING`, `UNBOUNDED FOLLOWING`. +* If `frame_end` is omitted, it defaults to `CURRENT ROW`. The following restrictions apply: +** `frame_start` cannot be `UNBOUNDED FOLLOWING`. +** `frame_end` cannot be `UNBOUNDED PRECEDING`. +** `frame_end` cannot appear earlier in the list of `frame_start` and `frame_end` options than the `frame_start` choice does. + +In `ROWS` mode, `CURRENT ROW` means that the frame starts or ends with the current row. The offset is an integer indicating that the frame starts or ends that many rows before or after the current row. + +[NOTE] +==== +The `ROWS` mode can produce unpredictable results if the `ORDER BY` ordering does not order the rows uniquely. +==== + +== Examples + +For the examples in this section, create the `winsales` table: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== OVER clause with PARTITION BY and ORDER BY + +This example uses the `OVER` clause with `PARTITION BY` and `ORDER BY`: + +[source,sql] +---- +SELECT *, + SUM(qty) OVER (PARTITION BY sellerid) AS seller_qty +FROM winsales +ORDER BY sellerid, salesid; +---- + +The query returns: + +[source,sql] +---- + salesid | dateid | sellerid | buyerid | qty | qty_shipped | seller_qty +---------+------------+----------+---------+-----+-------------+------------ + 10001 | 2003-12-24 | 1 | c | 10 | 10 | 50 + 10005 | 2003-12-24 | 1 | a | 30 | | 50 + 10006 | 2004-01-18 | 1 | c | 10 | | 50 + 20001 | 2004-02-12 | 2 | b | 20 | 20 | 40 + 20002 | 2004-02-16 | 2 | c | 20 | 20 | 40 + 30001 | 2003-08-02 | 3 | b | 10 | 10 | 75 + 30003 | 2004-04-18 | 3 | b | 15 | | 75 + 30004 | 2004-04-18 | 3 | b | 20 | | 75 + 30007 | 2004-09-07 | 3 | c | 30 | | 75 + 40001 | 2004-01-09 | 4 | a | 40 | | 50 + 40005 | 2004-02-12 | 4 | a | 10 | 10 | 50 +(11 rows) +---- + +=== OVER clause with named window + +This example uses the `OVER` clause with a named window defined in the `WINDOW` clause: + +[source,sql] +---- +SELECT *, + SUM(qty) OVER seller AS seller_qty +FROM winsales WINDOW seller AS (PARTITION BY sellerid) +ORDER BY sellerid, salesid; +---- + +The query returns: + +[source,sql] +---- + salesid | dateid | sellerid | buyerid | qty | qty_shipped | seller_qty +---------+------------+----------+---------+-----+-------------+------------ + 10001 | 2003-12-24 | 1 | c | 10 | 10 | 50 + 10005 | 2003-12-24 | 1 | a | 30 | | 50 + 10006 | 2004-01-18 | 1 | c | 10 | | 50 + 20001 | 2004-02-12 | 2 | b | 20 | 20 | 40 + 20002 | 2004-02-16 | 2 | c | 20 | 20 | 40 + 30001 | 2003-08-02 | 3 | b | 10 | 10 | 75 + 30003 | 2004-04-18 | 3 | b | 15 | | 75 + 30004 | 2004-04-18 | 3 | b | 20 | | 75 + 30007 | 2004-09-07 | 3 | c | 30 | | 75 + 40001 | 2004-01-09 | 4 | a | 40 | | 50 + 40005 | 2004-02-12 | 4 | a | 10 | 10 | 50 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-clauses/set-operations/except.adoc b/modules/reference/pages/sql/sql-clauses/set-operations/except.adoc new file mode 100644 index 000000000..b7e9d476c --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/set-operations/except.adoc @@ -0,0 +1,249 @@ += EXCEPT +:description: The EXCEPT combines the result sets of two or more tables and retrieves rows specific to the first SELECT statement but not present in the subsequent ones. +:page-topic-type: reference + +`EXCEPT` combines the result sets of two or more tables and retrieves rows specific to the first `SELECT` statement but not present in the subsequent ones. + +== Syntax + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM table1 +EXCEPT +SELECT value1, value2, ... value_n +FROM table2; +---- + +Where: + +* `value1, value2, ... value_n`: The columns to retrieve. +* `table1, table2`: The tables to retrieve records from. + +== Examples + +Suppose you have two tables: `vehicles_2021` and `vehicles_2022`. The goal is to find the vehicle that was present in 2021 but is not present in 2022: + +[source,sql] +---- +CREATE TABLE vehicles_2021 ( + vhc_id INT, + vhc_name TEXT +); + +CREATE TABLE vehicles_2022 ( + vhc_id INT, + vhc_name TEXT +); + +INSERT INTO vehicles_2021 VALUES +(1, 'Truck'), +(2, 'Car'), +(3, 'Motorcycle'); + +INSERT INTO vehicles_2022 VALUES +(2, 'Car'), +(3, 'Bus'), +(4, 'Motorcycle'); +---- + +View the tables: + +[source,sql] +---- +SELECT * FROM vehicles_2021; +SELECT * FROM vehicles_2022; +---- + +[source,sql] +---- +vhc_id | vhc_name +--------+------------ + 1 | Truck + 2 | Car + 3 | Motorcycle + + vhc_id | vhc_name +--------+------------ + 2 | Car + 3 | Bus + 4 | Motorcycle +---- + +Use `EXCEPT` to find vehicle names present in the first table but not in the second: + +[source,sql] +---- +SELECT vhc_name FROM vehicles_2021 +EXCEPT +SELECT vhc_name FROM vehicles_2022; +---- + +The query returns vehicle names that exist in `vehicles_2021` but not in `vehicles_2022`: + +[source,sql] +---- +vhc_name +---------- + Truck +---- + +Only `Truck` is present in the first table but not the second. + +== EXCEPT ALL + +`EXCEPT ALL` finds rows specific to the first `SELECT` statement while preserving duplicate entries. + +=== Syntax + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM table1 +EXCEPT ALL +SELECT value1, value2, ... value_n +FROM table2; +---- + +Where: + +* `value1, value2, ... value_n`: The columns to retrieve. +* `table1, table2`: The tables to retrieve records from. + +=== Customers in only one marketplace + +To identify customers who have purchased products from one marketplace but not from another, create the tables and populate them with relevant data: + +[source,sql] +---- +CREATE TABLE marketplace1_transactions ( + customer_id INT, + product_id INT, + amount FLOAT +); + +CREATE TABLE marketplace2_transactions ( + customer_id INT, + product_id INT, + amount FLOAT +); + +INSERT INTO marketplace1_transactions VALUES +(101, 1, 100.00), +(102, 2, 150.00), +(103, 3, 200.00), +(104, 1, 120.00); + +INSERT INTO marketplace2_transactions VALUES +(102, 3, 180.00), +(103, 2, 160.00), +(105, 4, 90.00), +(106, 1, 110.00); +---- + +View the tables: + +[source,sql] +---- +SELECT * FROM marketplace1_transactions; +SELECT * FROM marketplace2_transactions; +---- + +[source,sql] +---- +customer_id | product_id | amount +-------------+------------+-------- + 101 | 1 | 100 + 102 | 2 | 150 + 103 | 3 | 200 + 104 | 1 | 120 + + customer_id | product_id | amount +-------------+------------+-------- + 102 | 3 | 180 + 103 | 2 | 160 + 105 | 4 | 90 + 106 | 1 | 110 +---- + +Use `EXCEPT ALL` to find customers who have purchased products from one marketplace but not from the other: + +[source,sql] +---- +SELECT customer_id FROM marketplace1_transactions +EXCEPT ALL +SELECT customer_id FROM marketplace2_transactions; +---- + +The query returns the `customer_id` values that appear in the first marketplace but not in the second: + +[source,sql] +---- +customer_id +------------- + 104 + 101 +---- + +=== Compare arrays with duplicates + +Create two tables, `left_array_values` and `right_array_values`, to hold sets of values: + +[source,sql] +---- +CREATE TABLE left_array_values ( + value INT +); + +CREATE TABLE right_array_values ( + value INT +); + +INSERT INTO left_array_values VALUES (1), (1), (3); +INSERT INTO right_array_values VALUES (1), (2); +---- + +View the contents of the two tables before performing the comparison: + +[source,sql] +---- +SELECT * FROM left_array_values; +SELECT * FROM right_array_values; +---- + +The tables contain: + +[source,sql] +---- +value +------- + 1 + 1 + 3 + + value +------- + 1 + 2 +---- + +Use `EXCEPT ALL` to compare the values, focusing on unique elements while retaining duplicate entries: + +[source,sql] +---- +SELECT value +FROM left_array_values +EXCEPT ALL +SELECT value +FROM right_array_values; +---- + +`EXCEPT ALL` compares elements pairwise, so both `1` and `3` appear in the final result: + +[source,sql] +---- +value +------- + 3 + 1 +---- diff --git a/modules/reference/pages/sql/sql-clauses/set-operations/index.adoc b/modules/reference/pages/sql/sql-clauses/set-operations/index.adoc new file mode 100644 index 000000000..0e90e8001 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/set-operations/index.adoc @@ -0,0 +1,14 @@ += Set operations +:description: Set operations combine, compare, or contrast result sets from multiple SELECT statements. +:page-topic-type: reference + +Set operations combine, compare, or contrast result sets from multiple `SELECT` statements. Redpanda SQL supports the following operations: + +* xref:reference:sql/sql-clauses/set-operations/union.adoc[Union]: Combines two or more sets to create a new set containing all unique elements from the input sets. +* xref:reference:sql/sql-clauses/set-operations/intersect.adoc[Intersect]: Yields a new set with elements common to all input sets. +* xref:reference:sql/sql-clauses/set-operations/except.adoc[Except]: Generates a set containing elements from the first set that are not present in the second set. + +[NOTE] +==== +For all set operations, the data types of corresponding columns in the `SELECT` queries must be compatible. The order of columns is flexible as long as the columns in consecutive places are pairwise compatible. For example, `SELECT col1, col2 FROM table1 UNION SELECT col2, col1 FROM table2`. +==== diff --git a/modules/reference/pages/sql/sql-clauses/set-operations/intersect.adoc b/modules/reference/pages/sql/sql-clauses/set-operations/intersect.adoc new file mode 100644 index 000000000..b2b8eafa5 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/set-operations/intersect.adoc @@ -0,0 +1,207 @@ += INTERSECT +:description: The INTERSECT combines the result sets of two or more SELECT statements, retrieving only the common rows between them. +:page-topic-type: reference + +`INTERSECT` combines the result sets of two or more `SELECT` statements, retrieving only the common rows between them. Unlike `UNION`, which combines all rows and removes duplicates, `INTERSECT` returns rows that appear in all `SELECT` statements. + +== Syntax + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM table1 +INTERSECT +SELECT value1, value2, ... value_n +FROM table2; +---- + +Where: + +* `value1, value2, ... value_n`: The columns to retrieve. You can also use `SELECT * FROM` to retrieve all columns. +* `table1, table2`: The tables to retrieve records from. + +== Examples + +Suppose you have two tables: `customers_old` and `customers_new`, containing customer data for different periods. To find the customers who are present in both tables: + +[source,sql] +---- +CREATE TABLE customers_old ( + customer_id INT, + customer_name TEXT +); + +CREATE TABLE customers_new ( + customer_id INT, + customer_name TEXT +); + +INSERT INTO customers_old VALUES +(1, 'Alice'), +(2, 'Bob'), +(3, 'Charlie'); + +INSERT INTO customers_new VALUES +(2, 'Bob'), +(3, 'Charlie'), +(4, 'David'); +---- + +View the inserted values: + +[source,sql] +---- +SELECT * FROM customers_old; +SELECT * FROM customers_new; +---- + +[source,sql] +---- +customer_id | customer_name +-------------+--------------- + 1 | Alice + 2 | Bob + 3 | Charlie + + customer_id | customer_name +-------------+--------------- + 2 | Bob + 3 | Charlie + 4 | David +---- + +Combine common customers using `INTERSECT`: + +[source,sql] +---- +SELECT customer_name FROM customers_old +INTERSECT +SELECT customer_name FROM customers_new; +---- + +The query returns only the names that appear in both tables: + +[source,sql] +---- +customer_name +--------------- + Bob + Charlie +---- + +Only `Bob` and `Charlie` appear in both tables. + +== INTERSECT ALL + +`INTERSECT ALL` retrieves all common rows between two or more tables, including duplicates. If a row appears in any of the `SELECT` statements multiple times, it is included in the final result set the same number of times. + +=== Syntax + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM table1 +INTERSECT ALL +SELECT value1, value2, ... value_n +FROM table2; +---- + +Where: + +* `value1, value2, ... value_n`: The columns to retrieve. You can also retrieve all columns using `SELECT * FROM`. +* `table1, table2`: The tables to retrieve records from. + +=== Example + +Create three tables of products from different years. To find the common products among all three tables, including duplicates: + +[source,sql] +---- +CREATE TABLE products_electronics2021 ( + product_id INT, + product_name TEXT +); + +CREATE TABLE products_electronics2022 ( + product_id INT, + product_name TEXT +); + +CREATE TABLE products_electronics2023 ( + product_id INT, + product_name TEXT +); + +INSERT INTO products_electronics2021 VALUES +(1, 'Laptop'), +(2, 'Phone'), +(3, 'Tablet'), +(4, 'Headphones'); + +INSERT INTO products_electronics2022 VALUES +(2, 'TV'), +(3, 'Printer'), +(4, 'Monitor'), +(5, 'Phone'); + +INSERT INTO products_electronics2023 VALUES +(3, 'Laptop'), +(4, 'Phone'), +(5, 'Oven'), +(6, 'AC'); +---- + +View the tables: + +[source,sql] +---- +SELECT * FROM products_electronics2021; +SELECT * FROM products_electronics2022; +SELECT * FROM products_electronics2023; +---- + +[source,sql] +---- +product_id | product_name +------------+-------------- + 1 | Laptop + 2 | Phone + 3 | Tablet + 4 | Headphones + + product_id | product_name +------------+-------------- + 2 | TV + 3 | Printer + 4 | Monitor + 5 | Phone + + product_id | product_name +------------+-------------- + 3 | Laptop + 4 | Phone + 5 | Oven + 6 | AC +---- + +Combine common products from all three tables using `INTERSECT ALL`: + +[source,sql] +---- +SELECT product_name FROM products_electronics2021 +INTERSECT ALL +SELECT product_name FROM products_electronics2022 +INTERSECT ALL +SELECT product_name FROM products_electronics2023; +---- + +The query returns the products common to all three tables, including duplicates: + +[source,sql] +---- +product_name +-------------- + Phone +---- + +Only `Phone` appears across all three tables. diff --git a/modules/reference/pages/sql/sql-clauses/set-operations/union.adoc b/modules/reference/pages/sql/sql-clauses/set-operations/union.adoc new file mode 100644 index 000000000..4108ade3e --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/set-operations/union.adoc @@ -0,0 +1,188 @@ += UNION +:description: The UNION combines the result sets of two or more SELECT statements, removing duplicate rows between the tables. +:page-topic-type: reference + +`UNION` combines the result sets of two or more `SELECT` statements, removing duplicate rows between the tables. + +== Syntax + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM table1 +UNION +SELECT value1, value2, ... value_n +FROM table2; +---- + +Where: + +* `value1, value2, ... value_n`: The columns to retrieve. You can also retrieve all columns using `SELECT * FROM`. +* `table1, table2`: The tables to retrieve records from. + +== Examples + +Suppose there is a table called `employees` and another table called `contractors`. The goal is to retrieve a combined list of names from both tables, excluding duplicates: + +[source,sql] +---- +CREATE TABLE employees ( + emp_id INT, + emp_name TEXT +); + +CREATE TABLE contractors ( + contractor_id INT, + contractor_name TEXT +); + +INSERT INTO employees VALUES +(1, 'John'), +(2, 'Alice'), +(3, 'Bob'); + +INSERT INTO contractors VALUES +(101, 'Alice'), +(102, 'Eve'), +(103, 'Tom'); +---- + +Verify the inserted values: + +[source,sql] +---- +SELECT * FROM employees; +SELECT * FROM contractors; +---- + +[source,sql] +---- +emp_id | emp_name +--------+---------- + 1 | John + 2 | Alice + 3 | Bob + + contractor_id | contractor_name +---------------+----------------- + 101 | Alice + 102 | Eve + 103 | Tom +---- + +Combine the values from the tables: + +[source,sql] +---- +SELECT emp_name FROM employees +UNION +SELECT contractor_name FROM contractors; +---- + +The query returns the values from both tables without duplicates: + +[source,sql] +---- +emp_name +---------- + Alice + Bob + Eve + John + Tom +---- + +The duplicate name `Alice` appears only once in the output. + +== UNION ALL + +`UNION ALL` combines the result sets of two or more `SELECT` statements, returning all rows from the queries without removing duplicates. + +=== Syntax + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM table1 +UNION ALL +SELECT value1, value2, ... value_n +FROM table2; +---- + +Where: + +* `value1, value2, ... value_n`: The columns to retrieve. You can also retrieve all columns using `SELECT * FROM`. +* `table1, table2`: The tables to retrieve records from. + +=== Example + +Suppose you have two separate tables, `sales_2022` and `sales_2023`, containing sales data for different years. To combine the sales data from both tables without removing duplicates: + +[source,sql] +---- +CREATE TABLE sales_2022 ( + transaction_id INT, + product_name TEXT, + sale_amount INT +); + +CREATE TABLE sales_2023 ( + transaction_id INT, + product_name TEXT, + sale_amount INT +); + +INSERT INTO sales_2022 VALUES +(1, 'Product A', 1000), +(2, 'Product B', 500), +(3, 'Product C', 750); + +INSERT INTO sales_2023 VALUES +(4, 'Product A', 1200), +(5, 'Product D', 800), +(6, 'Product E', 950); +---- + +Verify the inserted values: + +[source,sql] +---- +SELECT * FROM sales_2022; +SELECT * FROM sales_2023; +---- + +[source,sql] +---- +transaction_id | product_name | sale_amount +----------------+--------------+------------- + 1 | Product A | 1000 + 2 | Product B | 500 + 3 | Product C | 750 + + transaction_id | product_name | sale_amount +----------------+--------------+------------- + 4 | Product A | 1200 + 5 | Product D | 800 + 6 | Product E | 950 +---- + +Combine all values from the tables using `UNION ALL`: + +[source,sql] +---- +SELECT product_name, sale_amount FROM sales_2022 UNION ALL SELECT product_name, sale_amount FROM sales_2023; +---- + +The query returns all rows from the first table followed by all rows from the second table, including duplicates: + +[source,sql] +---- +product_name | sale_amount +--------------+------------- + Product A | 1000 + Product B | 500 + Product C | 750 + Product A | 1200 + Product D | 800 + Product E | 950 +---- diff --git a/modules/reference/pages/sql/sql-clauses/where.adoc b/modules/reference/pages/sql/sql-clauses/where.adoc new file mode 100644 index 000000000..d8e99c089 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/where.adoc @@ -0,0 +1,239 @@ += WHERE +:description: The WHERE clause filters records based on specified conditions, returning only the rows that meet the conditions. +:page-topic-type: reference + +The `WHERE` clause filters records based on specified conditions. It excludes records that do not meet the conditions and returns only the rows that match. + +== Syntax + +[source,sql] +---- +SELECT column1, column2, ... +FROM table_name +WHERE [condition] +---- + +This syntax includes the following elements: + +* `SELECT column1, column2, ...` defines the columns where the records are displayed. +* `FROM table_name` sets the table where the records are taken from. +* `WHERE [condition]` specifies the search condition using comparison or logical operators (for example, `>`, `=`, `LIKE`). + +[TIP] +==== +The query starts with the `FROM` clause, then evaluates the `WHERE` condition, and finally runs `SELECT` on the specified columns. +==== + +== Examples + +Assume there is a table called `salary` with the following records: + +[source,sql] +---- +CREATE TABLE salary ( + empid int, + empname text, + empdept text, + empaddress text, + empsalary int +); +INSERT INTO salary + (empid, empname, empdept, empaddress, empsalary) +VALUES + (2001,'Paul','HR', 'California', null ), + (2002,'Brandon','Product', 'Norway', 15000), + (2003,'Bradley','Marketing', 'Texas', null), + (2004,'Lisa','Marketing', 'Houston', 10000), + (2005,'Emily','Marketing', 'Texas', 20000), + (2006,'Bobby','Finance', 'Seattle', 20000), + (2007,'Parker','Project', 'Texas', 45000); +---- + +[source,sql] +---- +SELECT * FROM salary; +---- + +This returns: + +[source,sql] +---- ++--------+-----------+------------+-------------+------------+ +| empid | empname | empdept | empaddress | empsalary | ++--------+-----------+------------+-------------+------------+ +| 2001 | Paul | HR | California | null | +| 2002 | Brandon | Product | Norway | 15000 | +| 2003 | Bradley | Marketing | Texas | null | +| 2004 | Lisa | Marketing | Houston | 10000 | +| 2005 | Emily | Marketing | Texas | 20000 | +| 2006 | Bobby | Finance | Seattle | 20000 | +| 2007 | Parker | Project | Texas | 45000 | ++--------+-----------+------------+-------------+------------+ +---- + +=== WHERE clause with `=` operator + +The following example uses the `=` (equal) operator to look up the employees who work in the Marketing department: + +[source,sql] +---- +SELECT empname, empdept +FROM salary +WHERE empdept = 'Marketing'; +---- + +The query returns: + +[source,sql] +---- ++------------+-------------+ +| empname | empdept | ++------------+-------------+ +| Bradley | Marketing | +| Emily | Marketing | +| Lisa | Marketing | ++------------+-------------+ +---- + +[WARNING] +==== +The value defined in the `WHERE` clause condition is case-sensitive. Specify the exact value to match. +==== + +=== WHERE clause with `!=` operator + +The following example uses the `!=` (not equal) operator to look up employees who do not live in Texas: + +[source,sql] +---- +SELECT empname, empdept, empaddress +FROM salary +WHERE empaddress != 'Texas'; +---- + +[NOTE] +==== +You can also use the `<>` operator for "not equal". +==== + +The query returns: + +[source,sql] +---- ++------------+------------+--------------+ +| empname | empdept | empaddress | ++------------+------------+--------------+ +| Paul | HR | California | +| Brandon | Product | Norway | +| Lisa | Marketing | Houston | +| Bobby | Finance | Seattle | ++------------+------------+--------------+ +---- + +=== WHERE clause with `>` operator + +The following example uses the `>` (greater than) operator to find employees with a salary above 20000: + +[source,sql] +---- +SELECT empname, empdept, empsalary +FROM salary +WHERE empsalary > 20000; +---- + +[NOTE] +==== +You can use the `<` operator for a "less than" condition. +==== + +The query returns: + +[source,sql] +---- ++------------+------------+-------------+ +| empname | empdept | empsalary | ++------------+------------+-------------+ +| Parker | Project | 45000 | ++------------+------------+-------------+ +---- + +Only Parker has a salary greater than 20000. + +=== WHERE clause with `<=` operator + +The following example uses the `<=` (less than or equal to) operator to find employees with a salary less than or equal to 15000: + +[source,sql] +---- +SELECT empname, empdept, empsalary +FROM salary +WHERE empsalary <= '15000'; +---- + +[NOTE] +==== +You can use the `>=` operator for a "greater than or equal to" condition. +==== + +The query returns: + +[source,sql] +---- ++------------+------------+-------------+ +| empname | empdept | empsalary | ++------------+------------+-------------+ +| Brandon | Product | 15000 | +| Lisa | Marketing | 10000 | ++------------+------------+-------------+ +---- + +Brandon has a salary equal to 15000 and Lisa has a salary less than 15000. + +=== WHERE clause with `LIKE` operator + +The following example uses the `LIKE` operator to retrieve employees whose first name starts with `Br`: + +[source,sql] +---- +SELECT * FROM salary +WHERE empname LIKE 'Br%'; +---- + +[NOTE] +==== +To match a string at the end instead of the start, use `LIKE '%string'`. +==== + +The query returns `Brandon` and `Bradley`: + +[source,sql] +---- ++---------+------------+--------------+--------------+-----------+ +| empid | empname | empdept | empaddress | empsalary | ++---------+------------+-------------+--------------+------------+ +| 2002 | Brandon | Product | Norway | 15000 | +| 2003 | Bradley | Marketing | Texas | null | ++---------+------------+-------------+--------------+------------+ +---- + +=== WHERE clause with `IS NULL` operator + +The following example uses the `IS NULL` operator to find employees who do not have a salary value: + +[source,sql] +---- +SELECT * FROM salary +WHERE empsalary IS NULL; +---- + +The query returns: + +[source,sql] +---- ++---------+------------+-------------+--------------+------------+ +| empid | empname | empdept | empaddress | empsalary | ++---------+------------+-------------+--------------+------------+ +| 2001 | Paul | HR | California | null | +| 2003 | Bradley | Marketing | Texas | null | ++---------+------------+-------------+--------------+------------+ +---- diff --git a/modules/reference/pages/sql/sql-clauses/with.adoc b/modules/reference/pages/sql/sql-clauses/with.adoc new file mode 100644 index 000000000..17e060dcb --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/with.adoc @@ -0,0 +1,91 @@ += WITH +:description: The WITH clause defines auxiliary statements (Common Table Expressions) for use within a larger query. +:page-topic-type: reference + +The `WITH` clause defines auxiliary statements (referred to by their alias names) for use within a larger query. These auxiliary statements are also known as Common Table Expressions (CTEs). + +== Syntax + +The `WITH` clause precedes the primary statement it is attached to and contains a list of auxiliary statements with corresponding aliases. + +[source,sql] +---- +WITH [with_statement_alias AS (with_statement_body)]+ primary_statement; +---- + +* `primary_statement`: A `SELECT`, `INSERT`, `UPDATE`, or `DELETE` statement. +* `with_statement_body`: A `SELECT` statement. It can refer to aliases defined earlier in the query. + +== Semantics + +Redpanda SQL only supports non-materialized CTEs. Each auxiliary query alias is replaced with its corresponding body at the early stages of query processing. The following query: + +[source,sql] +---- +WITH a AS (SELECT 77), b AS (SELECT * FROM a) SELECT * FROM b +---- + +is effectively turned into: + +[source,sql] +---- +SELECT * FROM (SELECT * FROM (SELECT 77) AS a) AS b +---- + +The auxiliary query gets the same alias (`AS b` part) as in the `WITH` clause. To change it, set a new alias on usage. + +[source,sql] +---- +WITH b AS (SELECT 1 AS c1) SELECT b.c1, b1.c1 FROM b CROSS JOIN b AS b1; +---- + +== Usage + +Non-materialized `WITH` clauses are useful when you want to refactor a complex query to make it more readable. You can extract subqueries or reuse them in several places, having only one definition. Each use of a query is optimized separately, specifically for how the parent query uses its results. For example: + +[source,sql] +---- +WITH math_grades AS (SELECT g_date, semester_id, grade FROM grades WHERE subject="Math") +SELECT * FROM +(SELECT AVG(grade) FROM math_grades WHERE semester_id=2137) AS avg_semester_grades, +(SELECT AVG(grade) FROM math_grades WHERE g_date >= (CURRENT_TIMESTAMP() - INTERVAL '1 y')) AS avg_year_grades +---- + +Both subqueries use the same auxiliary `math_grades` query, but each filters it using different keys. As a result, both scans only read part of the table. With a materialized CTE (not yet supported), the query engine would scan the whole table first and then filter the result twice, once for each subquery. + +== Alias context + +You cannot create more than one CTE with the same alias within a single `WITH` clause. However, nested `SELECT` statements can each have their own `WITH` clauses, creating their own contexts for defined aliases. + +[NOTE] +==== +The same alias can be defined in more than one context. +==== + +[source,sql] +---- +WITH a AS ( -- creates context 1 + SELECT 1 +) +SELECT * FROM ( + WITH a AS (SELECT 2) -- creates context 2 + SELECT * FROM a -- uses context 2 +) CROSS JOIN a; -- uses context 1 +---- + +The query returns `2, 1` as output. + +When referencing an alias, the context defined at the nested query level is used. If the nested context does not define the referenced alias, the search moves up one level and repeats until an alias definition is found. + +[source,sql] +---- +WITH a AS ( + SELECT 1 +) +SELECT * FROM ( + WITH b as (SELECT 2) + SELECT * FROM b +) CROSS JOIN b; -- error +---- + +The query returns `ERROR: relation "b" does not exist`, because `b` is not defined in this context or any of the contexts above. diff --git a/modules/reference/pages/sql/sql-data-types/array.adoc b/modules/reference/pages/sql/sql-data-types/array.adoc new file mode 100644 index 000000000..a51bbf3b7 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/array.adoc @@ -0,0 +1,184 @@ += Array +:description: In Redpanda SQL, an array stores a collection of elements that have the same data type (any built-in data type can be used). +:page-topic-type: reference + +In Redpanda SQL, an array stores a collection of elements that have the same data type (any built-in data type can be used). + +[NOTE] +==== +Currently, the implementation is limited only to single-dimensional arrays. +==== + +== Array type declaration + +An array type can be declared by appending square brackets to the data type of its elements: + +[source,sql] +---- +CREATE TABLE movie_night ( + event_date DATE NOT NULL, + movies_planned TEXT[5] NOT NULL +); +---- + +This syntax specifies the size of the array. However, it does not enforce any limits, and the behavior is the same as for arrays of unspecified length. There is also another way to declare an array, by prepending the `ARRAY` keyword after the data type of the elements: + +[source,sql] +---- +CREATE TABLE movie_night ( + event_date DATE NOT NULL, + movies_planned TEXT ARRAY NOT NULL +); +---- + +== Array values + +You can create array literals by using the `ARRAY` keyword and combining it with the array's values enclosed in square brackets and separated by commas: + +[source,sql] +---- +ARRAY[ value1 , value2 , ... ] +---- + +You can use such a literal with, for example, `SELECT` or `INSERT INTO` statements: + +[source,sql] +---- +SELECT ARRAY['10:14:25'::time, '22:58:11'::time]; + ?column? +--------------------- + {10:14:25,22:58:11} +(1 row) + +INSERT INTO movie_night VALUES +('2024-12-01', ARRAY['Inception', 'Interstellar', 'The Prestige']); +INSERT 0 1 + +SELECT * FROM movie_night; + event_date | movies_planned +------------+----------------------------------------- + 2024-12-01 | {Inception,Interstellar,"The Prestige"} +(1 row) +---- + +You can also use a string representation of an array as another available option for array's values syntax. It requires the elements' values to be enclosed in curly braces and separated by commas: + +[source,sql] +---- +'{ value1 , value2 , ... }' +---- + +You can use such an array value representation in, for example, `INSERT INTO` statements with the `VALUES` clause: + +[source,sql] +---- +INSERT INTO movie_night VALUES ('2024-12-15', '{The Matrix, John Wick}'); +INSERT 0 1 + +SELECT * FROM movie_night; +event_date | movies_planned +------------+----------------------------------------- +2024-12-01 | {Inception,Interstellar,"The Prestige"} +2024-12-15 | {"The Matrix","John Wick"} +(2 rows) +---- + +Any element can be enclosed in double quotes and this is required, if the value contains commas or curly braces: + +[source,sql] +---- +SELECT '{"{\"key1\": 1, \"key2\": \"value\"}", NULL, true}'::json[]; + ?column? +----------------------------------------------- + {"{\"key1\":1,\"key2\":\"value\"}",NULL,true} +(1 row) +---- + +[NOTE] +==== +In this example, the double quotes which are a part of the JSON value are required to be escaped with a backslash, so that they are not mistaken with the double quote, which marks the end of the element. +==== + +== Access arrays + +You can retrieve a single element from an array using the array subscript operator. When it comes to array values indexing, the elements of an n-length array start at index `1` and end at index `n`: + +[source,sql] +---- +SELECT movies_planned, + movies_planned[1] AS first_movie, + movies_planned[3] AS third_movie +FROM movie_night; + movies_planned | first_movie | third_movie +-----------------------------------------+-------------+-------------- + {Inception,Interstellar,"The Prestige"} | Inception | The Prestige + {"The Matrix","John Wick"} | The Matrix | +(2 rows) +---- + +[NOTE] +==== +If the index exceeds the length of an array, the returned value is `NULL`. +==== +Arrays can also be accessed by using array slices. An array slice is denoted by writing `lower_bound:upper_bound`. The bounds can be omitted, in which case the slice is unbounded from a given side: + +[source,sql] +---- +SELECT movies_planned[:] as "unbounded slice", + movies_planned[1:2] AS "[1:2] slice", + movies_planned[2:] AS "[2:] slice" +FROM movie_night; + unbounded slice | [1:2] slice | [2:] slice +-----------------------------------------+----------------------------+------------------------------- + {Inception,Interstellar,"The Prestige"} | {Inception,Interstellar} | {Interstellar,"The Prestige"} + {"The Matrix","John Wick"} | {"The Matrix","John Wick"} | {"John Wick"} +(2 rows) +---- + +== Limitations + +=== Field size limit + +In Redpanda SQL, the field size limit for variable-size types is 32MB and this limit applies to arrays as well. If a value exceeds the given limit, an error is returned: + +[source,sql] +---- +CREATE TABLE tb (array_column bigint[]); +CREATE + +COPY tb FROM '/.oxla/long_array_value.csv'; +ERROR: Error in row 1, column array_column value exceeds size of 33554432 +---- + +=== Unsupported SQL clauses + +Array columns cannot be used as the key columns in `ORDER BY`, `GROUP BY`, or `JOIN` operations. You also cannot use array columns as part of the index of a table. For these operations, Redpanda SQL returns an error message: + +[source,sql] +---- +SELECT * FROM movie_night ORDER BY movies_planned; +ERROR: could not identify an ordering operator for type text[] +---- + +Arrays can still be used in `ORDER BY` or `JOIN` operations, if the array column is not the key: + +[source,sql] +---- +SELECT * FROM movie_night ORDER BY event_date ASC; + event_date | movies_planned +------------+----------------------------------------- + 2024-12-01 | {Inception,Interstellar,"The Prestige"} + 2024-12-15 | {"The Matrix","John Wick"} +(2 rows) +---- + +=== Unsupported SQL statements + +Specific SQL statements currently do not support arrays. These include: + +* `INSERT INTO` with `SELECT`: Arrays cannot be directly imported using an `INSERT INTO` with a `SELECT` statement. Instead, use the `COPY FROM CSV` command or the `INSERT INTO` statement with the `VALUES` keyword +* `UPDATE` and `DELETE`: Updating or deleting records from a table, which contains array columns is not supported +* `COPY TO`: Exporting data from array columns using the `COPY TO` command is not available. +* `CREATE INDEX`: Index on a table cannot be created on an array column. + +Any effort to use such operations with arrays will result in an error. For now, these limitations should be considered when designing tables that include array columns. diff --git a/modules/reference/pages/sql/sql-data-types/bool.adoc b/modules/reference/pages/sql/sql-data-types/bool.adoc new file mode 100644 index 000000000..6be575b41 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/bool.adoc @@ -0,0 +1,149 @@ += Bool +:description: BOOL is a data type for expressions that return one of two possible values: true or false. +:page-topic-type: reference + +== Overview + +A `BOOL` is a data type for expressions that return one of two possible values: `true` or `false`. + +[WARNING] +==== +`BOOLEAN` is an alias for the `BOOL` data type. You can create a table using `BOOLEAN`, but Redpanda SQL stores and processes the values as `BOOL`. +==== + +== Format + +* `FALSE` +* `TRUE` + +== Examples + +The following are examples of using a bool data type: + +=== Create a table + +The following example creates a `borrowBook` table to store book borrowing records. The table contains columns for the borrow ID, book name, borrower, and a `bool` column for the returned status. + +[source,sql] +---- +CREATE TABLE borrowBook ( + borrowID INT, + bookName TEXT, + borrower TEXT, + returnedStat BOOL NOT NULL +); +INSERT INTO borrowBook (borrowID,bookName, borrower, returnedStat) +VALUES + (101, 'The Silent Patient', 'Mike', TRUE), + (201, 'Malibu Rising', 'Jean', TRUE), + (301, 'The Guest List', 'Mark', FALSE), + (401, 'The Four Winds', 'Cliff', TRUE), + (501, 'The Vanishing Half: A Novel', 'Sarah', TRUE), + (601, 'Red, White & Royal Blue', 'Anna', FALSE), + (701, 'The Duke and I', 'Blake', FALSE), + (801, 'The Lord of the Rings', 'Sandra', FALSE); +---- + +The `borrowBook` table has been successfully created after executing the query: + +[source,sql] +---- +COMPLETE +INSERT 0 8 +---- + +=== Display the table + +Run the `SELECT` statement to get all records from the `borrowBook` table: + +[source,sql] +---- +SELECT * FROM borrowBook; +---- + +This returns the following result: + +[source,sql] +---- ++-----------+---------------------------------+------------+---------------+ +| borrowid | bookname | borrower | returnedstat | ++-----------+---------------------------------+------------+---------------+ +| 101 | The Silent Patient | Mike | t | +| 201 | Malibu Rising | Jean | t | +| 301 | The Guest List | Mark | f | +| 401 | The Four Winds | Cliff | t | +| 501 | The Vanishing Half: A Novel | Sarah | t | +| 601 | Red, White & Royal Blue | Anna | f | +| 701 | The Duke and I | Blake | f | +| 801 | The Lord of the Rings | Sandra | f | ++-----------+---------------------------------+------------+---------------+ +---- + +=== List of the returned books + +This example retrieves all the books that have already been returned: + +[source,sql] +---- +SELECT * FROM borrowbook +WHERE returnedstat= 'true'; +---- + +The query returns the following results: + +[source,sql] +---- ++-----------+---------------------------------+------------+---------------+ +| borrowid | bookname | borrower | returnedstat | ++-----------+---------------------------------+------------+---------------+ +| 101 | The Silent Patient | Mike | t | +| 201 | Malibu Rising | Jean | t | +| 401 | The Four Winds | Cliff | t | +| 501 | The Vanishing Half: A Novel | Sarah | t | ++-----------+---------------------------------+------------+---------------+ +---- + +=== List of the unreturned books + +To acquire all of the book records that haven't been returned yet, run the `SELECT` statement with a specified `WHERE` condition as `false`: + +[source,sql] +---- +SELECT * FROM borrowbook +WHERE returnedstat= 'false'; +---- + +The query returns the following results: + +[source,sql] +---- ++-----------+---------------------------------+------------+---------------+ +| borrowid | bookname | borrower | returnedstat | ++-----------+---------------------------------+------------+---------------+ +| 301 | The Guest List | Mark | f | +| 601 | Red, White & Royal Blue | Anna | f | +| 701 | The Duke and I | Blake | f | +| 801 | The Lord of the Rings | Sandra | f | ++-----------+---------------------------------+------------+---------------+ +---- + +=== Check a book's return status + +This example finds the returned status of the book "The Lord of the Rings" by executing the `SELECT` statement with a specified column in the `WHERE` clause: + +[source,sql] +---- +SELECT * FROM borrowbook +WHERE bookname = 'The Lord of the Rings'; +---- + +This query filters all records based on the specified conditions, showing that Sandra hasn't returned the book yet: + +[source,sql] +---- ++-----------+---------------------------------+------------+---------------+ +| borrowid | bookname | borrower | returnedstat | ++-----------+---------------------------------+------------+---------------+ +| 801 | The Lord of the Rings | Sandra | f | ++-----------+---------------------------------+------------+---------------+ +---- diff --git a/modules/reference/pages/sql/sql-data-types/date.adoc b/modules/reference/pages/sql/sql-data-types/date.adoc new file mode 100644 index 000000000..f8e2d18dd --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/date.adoc @@ -0,0 +1,69 @@ += Date +:description: The DATE data type stores calendar dates without a time zone in Redpanda SQL. +:page-topic-type: reference + +The `DATE` data type stores calendar dates without a time zone. Use it to store and insert date values. + +[NOTE] +==== +The date value is stored without the time zone. +==== + +== Format + +[source,sql] +---- +YYYY-MM-DD +---- + +* `YYYY` - Four-digit year +* `MM` - One / two-digit month +* `DD` - One / two-digit day + +== Examples + +In this example, the `emp_submission` table consists of the candidate ID, candidate name, the submitted department, and a submission date with a `DATE` data type. + +[source,sql] +---- +CREATE TABLE emp_submission ( + candidate_ID INT, + candidate_Name TEXT, + sub_dept TEXT, + sub_date DATE +); + +INSERT INTO emp_submission (candidate_ID, candidate_Name, sub_dept, sub_date) +VALUES +(8557411, 'Kumar', 'HR', '2022-05-01'), +(8557421, 'Ricky', 'HR', '2022-01-09'), +(8557451, 'Alice', 'Finance', '2022-08-02'), +(8557461, 'Angel', 'Product', '2012-04-16'), +(8557431, 'Joan', 'Finance', '2022-02-02'), +(8557471, 'Cody', 'Product', '2022-03-20'), +(8557491, 'Liam', 'Product', '2022-06-15'); +---- + +Now that the data has been inserted, execute the following `SELECT` statement: + +[source,sql] +---- +SELECT * FROM emp_submission; +---- + +The following is the result of the `SELECT` statement where the values in the `sub_date` column have `DATE` data type: + +[source,sql] +---- ++---------------+------------------+------------+---------------+ +| candidate_id | candidate_name | sub_dept | sub_date | ++---------------+------------------+------------+---------------+ +| 8557411 | Kumar | HR | 2022-05-01 | +| 8557421 | Ricky | HR | 2022-01-09 | +| 8557451 | Alice | Finance | 2022-08-02 | +| 8557461 | Angel | Product | 2012-04-16 | +| 8557431 | Joan | Finance | 2022-02-02 | +| 8557471 | Cody | Product | 2022-03-20 | +| 8557491 | Liam | Product | 2022-06-15 | ++---------------+------------------+------------+---------------+ +---- diff --git a/modules/reference/pages/sql/sql-data-types/geography.adoc b/modules/reference/pages/sql/sql-data-types/geography.adoc new file mode 100644 index 000000000..17dfc3c3f --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/geography.adoc @@ -0,0 +1,75 @@ += Geography +:description: The GEOGRAPHY data type stores geodetic (spherical) spatial point values using the WGS84 coordinate system (SRID 4326). +:page-topic-type: reference + +The `GEOGRAPHY` data type stores geodetic (spherical) spatial point values using the WGS84 coordinate system (SRID 4326). Unlike xref:reference:sql/sql-data-types/geometry.adoc[GEOMETRY], which uses planar coordinates, `GEOGRAPHY` interprets coordinates as longitude and latitude on the Earth's surface, and distance calculations return results in meters. + +[NOTE] +==== +Redpanda SQL supports only `POINT` geographies. Multi-part geometries such as `POLYGON`, `LINESTRING`, and `MULTIPOINT` are not supported. +==== + +== Format + +`GEOGRAPHY` values can be specified in the following formats: + +* WKT: `POINT(longitude latitude)` (SRID defaults to 4326) +* EWKT: `SRID=4326;POINT(longitude latitude)` +* EWKB: A hex-encoded binary string (50 hex characters, includes SRID) + +[source,sql] +---- +SELECT GEOGRAPHY 'POINT(-73.9857 40.7484)'; +---- + +== SRID handling + +`GEOGRAPHY` always uses SRID 4326 (WGS84). If you specify a different SRID, an error is returned. + +== Casting + +`GEOGRAPHY` supports the following casts: + +* `GEOGRAPHY` -> `TEXT`: Returns EWKB hex string +* `TEXT` -> `GEOGRAPHY`: Parses WKT or EWKB string +* `GEOGRAPHY` -> `GEOMETRY`: Removes SRID +* `GEOMETRY` -> `GEOGRAPHY`: Adds SRID=4326 + +[NOTE] +==== +Casting between `GEOGRAPHY` and `POINT` is not supported. +==== + +== Functions + +The following functions work with `GEOGRAPHY` values: + +[cols="2,3,1",options="header"] +|=== +|Function |Description |Return type +|`ST_ASTEXT(geography)` |Returns the WKT representation |`text` +|`ST_ASTEXT(geography, max_digits)` |Returns the WKT representation with limited decimal digits |`text` +|`ST_ASEWKT(geography)` |Returns the Extended WKT representation (includes SRID) |`text` +|`ST_ASEWKT(geography, max_digits)` |Returns the Extended WKT representation with limited decimal digits |`text` +|`ST_DISTANCE(geography, geography)` |Returns the geodetic distance in meters using the WGS84 ellipsoid |`double precision` +|`ST_DISTANCE(geography, geography, use_spheroid)` |Returns the geodetic distance in meters. Set `use_spheroid` to `false` for a faster spherical approximation. |`double precision` +|=== + +== Examples + +Calculate the distance in meters between two geographic points (New York City and London): + +[source,sql] +---- +SELECT ST_DISTANCE( + GEOGRAPHY 'POINT(-73.9857 40.7484)', + GEOGRAPHY 'POINT(-0.1278 51.5074)' +) AS distance_meters; +---- + +[source,sql] +---- + distance_meters +------------------- + 5570222.179854498 +---- diff --git a/modules/reference/pages/sql/sql-data-types/geometry.adoc b/modules/reference/pages/sql/sql-data-types/geometry.adoc new file mode 100644 index 000000000..85e023708 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/geometry.adoc @@ -0,0 +1,77 @@ += Geometry +:description: The GEOMETRY data type stores planar (Cartesian) spatial point values as two double-precision coordinates. +:page-topic-type: reference + +The `GEOMETRY` data type stores planar (Cartesian) spatial point values as two double-precision coordinates. It uses https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry[Well-Known Text (WKT)^] and https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary[Well-Known Binary (WKB)^] formats for input and output. + +[NOTE] +==== +Redpanda SQL supports only `POINT` geometries. Multi-part geometries such as `POLYGON`, `LINESTRING`, and `MULTIPOINT` are not supported. +==== + +== Format + +`GEOMETRY` values can be specified in the following formats: + +* WKT: `POINT(x y)` (space-separated coordinates) +* EWKT: `SRID=4326;POINT(x y)` (SRID is accepted but ignored for `GEOMETRY`) +* EWKB: A hex-encoded binary string (42 hex characters) + +[source,sql] +---- +SELECT GEOMETRY 'POINT(0.1234 5.6789)'; +---- + +== Differences between GEOMETRY and GEOGRAPHY + +[cols="1,1",options="header"] +|=== +|GEOMETRY |GEOGRAPHY +|Uses a Cartesian (planar) coordinate system |Uses a geodetic (spherical) coordinate system +|SRID is ignored |SRID is always 4326 (WGS84) +|`ST_DISTANCE` returns Euclidean distance |`ST_DISTANCE` returns distance in meters +|=== + +== Casting + +`GEOMETRY` supports the following casts: + +* `GEOMETRY` -> `TEXT`: Returns WKB hex string +* `TEXT` -> `GEOMETRY`: Parses WKT or EWKB string +* `GEOMETRY` -> `GEOGRAPHY`: Adds SRID=4326 +* `GEOGRAPHY` -> `GEOMETRY`: Removes SRID +* `GEOMETRY` -> `POINT`: Converts to `(x,y)` format +* `POINT` -> `GEOMETRY`: Converts to WKB format + +== Functions + +The following functions work with `GEOMETRY` values: + +[cols="1,2,1",options="header"] +|=== +|Function |Description |Return type +|`ST_ASTEXT(geometry)` |Returns the WKT representation of the geometry |`text` +|`ST_ASTEXT(geometry, max_digits)` |Returns the WKT representation with limited decimal digits |`text` +|`ST_ASEWKT(geometry)` |Returns the Extended WKT representation |`text` +|`ST_ASEWKT(geometry, max_digits)` |Returns the Extended WKT representation with limited decimal digits |`text` +|`ST_DISTANCE(geometry, geometry)` |Returns the Euclidean distance between two geometry points |`double precision` +|=== + +== Examples + +[source,sql] +---- +SELECT + ST_ASTEXT(GEOMETRY 'POINT(1.5 2.5)') AS wkt, + ST_DISTANCE( + GEOMETRY 'POINT(0 0)', + GEOMETRY 'POINT(3 4)' + ) AS distance; +---- + +[source,sql] +---- + wkt | distance +----------------+---------- + POINT(1.5 2.5) | 5 +---- diff --git a/modules/reference/pages/sql/sql-data-types/index.adoc b/modules/reference/pages/sql/sql-data-types/index.adoc new file mode 100644 index 000000000..9d21e1fa6 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/index.adoc @@ -0,0 +1,38 @@ += SQL data types +:description: Redpanda SQL supports a wide range of data types, each designed to handle specific types of data efficiently. +:page-topic-type: reference + +Redpanda SQL supports a wide range of data types, each designed to handle specific types of data efficiently. + +The following table summarizes the data types supported by Redpanda SQL: + +[width="100%",cols="<48%,<29%,<23%",options="header",] +|=== +|Data Type |Definition |Format +|xref:reference:sql/sql-data-types/numeric-type/numeric.adoc#int-type[INT] |32-bit signed integer |one or more digits "`0`" to "`9`" +|xref:reference:sql/sql-data-types/numeric-type/numeric.adoc#bigint-type[BIGINT] |64-bit signed integer |large numeric/decimal value +|xref:reference:sql/sql-data-types/numeric-type/numeric.adoc#real-type[REAL] |32-bit floating point number |`float(n)` +|xref:reference:sql/sql-data-types/numeric-type/numeric.adoc#double-precision-type[DOUBLE PRECISION] |64-bit floating point number |`decimal(p, s)` +|xref:reference:sql/sql-data-types/timestamp-without-time-zone.adoc[TIMESTAMP WITHOUT TIME ZONE] |Time and date values without a time zone |`YYYY-MM-DD [HH:MM:SS[.SSSSSS]]` +|xref:reference:sql/sql-data-types/timestamp-with-time-zone.adoc[TIMESTAMP WITH TIME ZONE] |Date and time values, including the time zone information |`YYYY-MM-DD HH:MM:SS.SSSSSS{plus}TZ` +|xref:reference:sql/sql-data-types/date.adoc[DATE] |Date value |`YYYY-MM-DD` +|xref:reference:sql/sql-data-types/time-type/time.adoc[TIME] |Time values without any date information |`HH:MM:SS[.SSSSSS]` +|xref:reference:sql/sql-data-types/interval.adoc[INTERVAL] |Encodes a span of time |`year-month (YYYY-MM); day-time (DD HH:MM:SS)` +|xref:reference:sql/sql-data-types/bool.adoc[BOOL] |Boolean value |`True` or `False` +|xref:reference:sql/sql-data-types/text.adoc[TEXT] |UTF8 encoded string with Unicode support |'`text`' +|xref:reference:sql/sql-data-types/json.adoc[JSON] |A value in JSON standard format |`variable_name JSON` +|xref:reference:sql/sql-data-types/array.adoc[ARRAY] |An array of a specific data type |`'{value1, value2, value3}'::data_type[]` +|xref:reference:sql/sql-data-types/row.adoc[ROW] |A composite value containing fields of different types |`ROW(value1, value2, ...)` +|xref:reference:sql/sql-data-types/geometry.adoc[GEOMETRY] |A spatial data type for planar (Cartesian) point values |`GEOMETRY 'POINT(x y)'` +|xref:reference:sql/sql-data-types/geography.adoc[GEOGRAPHY] |A spatial data type for geodetic (spherical) point values using WGS84 |`GEOGRAPHY 'POINT(lon lat)'` +|=== + +[WARNING] +==== +When performing operations on numeric or temporal types, overflows can lead to undefined behavior, resulting in unexpected values or errors. Ensure input values are within the allowed range for each numeric type to prevent overflows. This can occur during arithmetic operations or function execution (for example, `AVG()`), where the result does not fit the result type. Using larger data types such as `BIGINT` can help mitigate overflow risks. +==== + +[NOTE] +==== +Explicit casting between types can cause data loss due to altered precision or magnitude, such as truncating fractional seconds in `TIME` or silently clipping out-of-range values. Verify input ranges to prevent unintended data loss. +==== diff --git a/modules/reference/pages/sql/sql-data-types/interval.adoc b/modules/reference/pages/sql/sql-data-types/interval.adoc new file mode 100644 index 000000000..d8ed544c0 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/interval.adoc @@ -0,0 +1,176 @@ += Interval +:description: The Interval data type represents periods between dates or times, which can be precisely calculated and expressed through various units. +:page-topic-type: reference + +The Interval data type represents periods between dates or times, which can be precisely calculated and expressed through various units. Those can be combined and include additional options for different interval calculations. +== Syntax + +The syntax for specifying an interval is as follows: + +[source,sql] +---- +SELECT INTERVAL 'quantity unit [quantity unit...] [direction]' [OPTION] +---- + +[cols="1,3",options="header"] +|=== +|Parameter |Description + +|`quantity` +|The value representing the number of units. + +|`unit` +a|Year, month, day, hour, and minute. Abbreviations, short forms, and dash format are supported. Plural forms are also acceptable (for example, months, days, weeks). + +|`direction` +|An optional parameter: `ago` or empty string. + +|`OPTION` +|Additional options when parsing interval. +|=== + +[NOTE] +==== +For arithmetic and comparison operations, Redpanda SQL assumes `1 month = 30 days` and `1 day = 24 hours`. However, adding 30 days to a timestamp is not always equivalent to adding 1 month, because calendar months have different durations. +==== + +== Supported units and abbreviations + +[cols=",",options="header",] +|=== +|*Unit* |*Abbreviations* +|Millennium |- +|Century |- +|Decade |- +|Year |`y`, `yr`, `yrs` +|Month |- +|Week |- +|Day |`d` +|Hour |`h`, `hr`, `hrs` +|Minute |`min`, `mins`, `m` +|Second |`s`, `sec`, `secs` +|Millisecond |`ms` +|Microsecond |- +|=== + +== Options for interval parsing + +* `YEAR`, `MONTH`, `DAY`, `HOUR`, `MINUTE`, `SECOND` +* `YEAR TO MONTH`, `DAY TO HOUR`, `DAY TO MINUTE`, `DAY TO SECOND`, `HOUR TO MINUTE`, `HOUR TO SECOND`, `MINUTE TO SECOND` + +== Examples + +=== Select interval with multiple units + +This example calculates an interval by combining multiple units of time. + +[source,sql] +---- +SELECT INTERVAL '5 years 4 months 2 weeks 3 days 5 hours 10 minutes 25 seconds' as "Interval"; +---- + +[source,sql] +---- + Interval +--------------------------------- + 5 years 4 mons 17 days 05:10:25 +(1 row) +---- + +=== Use abbreviations + +This example shows how to use abbreviated units for time intervals. + +[source,sql] +---- +SELECT INTERVAL '10 yr 8 months 2 weeks 6 days 5 hrs 10 min 20 s as "Interval"; +---- + +[source,sql] +---- + Interval +---------------------------------- + 10 years 8 mons 20 days 05:10:20 +(1 row) +---- + +=== Use dash format +[source,sql] +---- +SELECT INTERVAL '1-2 3 DAYS 04:05:06.070809' as "Interval"; +---- + +[source,sql] +---- + Interval +-------------------------------------- + 1 year 2 mons 3 days 04:05:06.070809 +(1 row) +---- + +=== Parse intervals using specific units + +By running the following code, the output shows everything up to minutes and ignores seconds and milliseconds. + +[source,sql] +---- +SELECT INTERVAL '1-2 5 DAYS 07:08:06.040809' MINUTE as "Interval"; +---- + +[source,sql] +---- + Interval +------------------------------- + 1 year 2 mons 5 days 07:08:00 +(1 row) +---- + +=== Display specific range only + +Executing the following query results in only years and months being displayed, excluding days, hours, minutes, and seconds from the input. + +[source,sql] +---- +SELECT INTERVAL '2-4 5 DAYS 04:05:06.070809' YEAR TO MONTH as "Interval"; +---- + +[source,sql] +---- + Interval +---------------- + 2 years 4 mons +(1 row) +---- + +=== Extract data from interval + +To extract interval numbers from a timestamp, use the `EXTRACT()` function: + +[source,sql] +---- +SELECT EXTRACT (field FROM interval) +---- + +* `field`: Supports time units, such as `YEAR`, `MONTH`, `DAY`, and `HOUR`. +* `interval`: Specified timestamp. + +[source,sql] +---- +SELECT EXTRACT (MINUTE +FROM INTERVAL '2 hours 30 minutes'); +---- + +The output returns only the minutes part: + +[source,sql] +---- + extract +------------ + 30 +(1 row) +---- + +[NOTE] +==== +If you query a field that is not specified in the timestamp, the output is `0`. +==== diff --git a/modules/reference/pages/sql/sql-data-types/json.adoc b/modules/reference/pages/sql/sql-data-types/json.adoc new file mode 100644 index 000000000..0e0bf966f --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/json.adoc @@ -0,0 +1,80 @@ += JSON +:description: JSON stands for JavaScript Object Notation. +:page-topic-type: reference + +== Overview + +JSON stands for JavaScript Object Notation. It is an open standard format with key-value pairs to transport data between a server and a web application. + +== Syntax + +The JSON data type in Redpanda SQL has the following syntax: + +[source,sql] +---- +variable_name JSON +---- + +== Examples + +=== Create a table + +First, create the `orders` table using the following command: + +[source,sql] +---- +CREATE TABLE orders ( + orders_Detail JSON +); +---- + +This creates a table with the `orders_Detail` column to store key-value pairs of data. + +=== Insert data + +Next, insert data into the orders table as follows: + +[source,sql] +---- +INSERT INTO orders (orders_Detail) +VALUES +('{ "customer": "Dean Smith", "items": {"product": "cup","qty": 2}}'), +('{ "customer": "Sissy Kate", "items": {"product": "knife","qty": 1}}'), +('{ "customer": "Emma Stone", "items": {"product": "spoon","qty": 4}}'), +('{ "customer": "Chris Bale", "items": {"product": "fork","qty": 5}}'), +('{ "customer": "Mike Stuart", "items": {"product": "spatula","qty": 2}}'); +---- + +This inserts data values where `orders_Detail` has the following keys: + +* `customer`: Stores the customer's data. +* `items`: Stores the order details, including `product` and `qty`. + +=== Retrieve data + +Use the `SELECT` command to retrieve the orders table's data. + +[source,sql] +---- +SELECT * FROM orders; +---- + +The query returns the following output: + +[source,sql] +---- ++--------------------------------------------------------------------------+ +| orders_detail | ++--------------------------------------------------------------------------+ +| {"customer":"Dean Smith","items":{"qty":2.000000,"product":"cup"}} | +| {"customer":"Sissy Kate","items":{"product":"knife","qty":1.000000}} | +| {"customer":"Emma Stone","items":{"qty":4.000000,"product":"spoon"}} | +| {"customer":"Chris Bale","items":{"product":"fork","qty":5.000000}} | +| {"customer":"Mike Stuart","items":{"qty":2.000000,"product":"spatula"}} | ++--------------------------------------------------------------------------+ +---- + +[TIP] +==== +It is normal for the JSON type's result to look disordered. +==== diff --git a/modules/reference/pages/sql/sql-data-types/numeric-type/index.adoc b/modules/reference/pages/sql/sql-data-types/numeric-type/index.adoc new file mode 100644 index 000000000..1951c000e --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/numeric-type/index.adoc @@ -0,0 +1,3 @@ += Numeric Types +:description: Reference for numeric data types in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-data-types/numeric-type/numeric-data-type-aliases.adoc b/modules/reference/pages/sql/sql-data-types/numeric-type/numeric-data-type-aliases.adoc new file mode 100644 index 000000000..4085015d2 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/numeric-type/numeric-data-type-aliases.adoc @@ -0,0 +1,91 @@ += Numeric Data Type - Aliases +:description: Aliases for the primary numeric data types in Redpanda SQL, mapped to their primary types during processing. +:page-topic-type: reference + +Redpanda SQL accepts aliases that you can use interchangeably with the primary data types. During processing, Redpanda SQL maps these aliases to their corresponding primary data types. + +The following sections describe the numeric data type aliases: + +== INTEGER alias + +The `INTEGER` alias is an alternative name for the `INT` data type. For example, the following two queries are functionally the same: + +[source,sql] +---- +CREATE TABLE ExampleTable ( + id INTEGER, +); + +-- Functionally the same as the previous table +CREATE TABLE AnotherTable ( + id INT, +); +---- + +[NOTE] +==== +Even though you write `INTEGER`, Redpanda SQL stores and treats the data as `INT`. +==== + +== LONG alias + +The `LONG` alias is often used to represent larger integer values. For example: + +[source,sql] +---- +CREATE TABLE LargeValues ( + value LONG, +); + +-- Functionally the same as the previous table +CREATE TABLE LargeValuesEquivalent ( + value BIGINT, +); +---- + +[NOTE] +==== +Any usage of `LONG` is stored and treated as `BIGINT`. +==== + +== FLOAT alias + +The `FLOAT` alias corresponds to the `REAL` data type. For example: + +[source,sql] +---- +CREATE TABLE FloatExample ( + price FLOAT, +); + +-- Functionally the same as the previous table +CREATE TABLE FloatEquivalent ( + price REAL, +); +---- + +[NOTE] +==== +When you use `FLOAT`, it's stored and treated as `REAL`. +==== + +== DOUBLE alias + +The `DOUBLE` alias defines `DOUBLE PRECISION` floating-point numbers. For example: + +[source,sql] +---- +CREATE TABLE DoubleExample ( + measurement DOUBLE, +); + +-- Functionally the same as the previous table +CREATE TABLE DoubleEquivalent ( + measurement DOUBLE PRECISION, +); +---- + +[NOTE] +==== +When you use `DOUBLE`, it's stored and treated as `DOUBLE PRECISION`. +==== diff --git a/modules/reference/pages/sql/sql-data-types/numeric-type/numeric.adoc b/modules/reference/pages/sql/sql-data-types/numeric-type/numeric.adoc new file mode 100644 index 000000000..943e19e66 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/numeric-type/numeric.adoc @@ -0,0 +1,304 @@ += Numeric +:description: The INT data type represents whole numbers without decimal points. +:page-topic-type: reference + +== Int type + +The `INT` data type represents whole numbers without decimal points. It is a 32-bit signed integer with a range from -2147483648 to 2147483647. + +=== Format + +[source,sql] +---- +column_name INT +---- + +=== Example + +The following is an example of how to create a column using an `INT` type. + +[source,sql] +---- +CREATE TABLE cities ( + city_id INT, + cityname TEXT, + population INT +); +INSERT INTO cities (city_id, cityname, population) +VALUES +(8557411, 'New York', 8419000), +(8557421, 'London', 8982000), +(8557451, 'Hongkong', 7482000), +(8557491, 'Seoul', 9776000); +---- + +To display the table, run the query: + +[source,sql] +---- +SELECT * FROM cities; +---- + +This returns the following result. + +[source,sql] +---- + city_id | cityname | population +---------+----------+------------ + 8557411 | New York | 8419000 + 8557421 | London | 8982000 + 8557451 | Hongkong | 7482000 + 8557491 | Seoul | 9776000 +(4 rows) +---- + +== Bigint type + +The `BIGINT` data type stores large whole numbers that exceed the `INT` range. It is a 64-bit signed integer with a range from -9223372036854775808 to 9223372036854775807. + +=== Format + +[source,sql] +---- +column_name BIGINT +---- + +=== Example + +The following is an example of how to create a column using the `BIGINT` type: + +[source,sql] +---- +CREATE TABLE galaxies ( + galaxy_name TEXT, + star BIGINT +); +INSERT INTO galaxies (galaxy_name, star) +VALUES +('Milky Way', 100000000000), +('Cigar', 30000000000), +('Andromeda', 1000000000000), +('Cosmos', 2000000000000000000); +---- + +To display the table, run the query: + +[source,sql] +---- +SELECT * FROM galaxies; +---- + +The query returns the following output: + +[source,sql] +---- + galaxy_name | star +-------------+--------------------- + Milky Way | 100000000000 + Cigar | 30000000000 + Andromeda | 1000000000000 + Cosmos | 2000000000000000000 +(4 rows) +---- + +== Real type + +The `REAL` data type is a 32-bit floating-point number compliant with the IEEE 754 binary32 format. + +=== Format + +[source,sql] +---- +column_name REAL +---- + +=== Example + +==== Create a table + +The following example creates a table with a `REAL` column type. + +[source,sql] +---- +CREATE TABLE numbers ( + column_1 REAL +); + +INSERT into numbers (column_1) +VALUES (1.234568); +---- + +Display the table with the following query. + +[source,sql] +---- +SELECT * FROM numbers; +---- + +The stored value is shown in the following output. + +[source,sql] +---- + column_1 +---------- + 1.234568 +(1 row) +---- + +==== Rounding + +Rounding might happen if the precision of an input number is too high. + +[source,sql] +---- +CREATE TABLE numbers1 ( +column_1 REAL +); + +INSERT into numbers1 (column_1) +VALUES (1.2345689); +---- + +Display the table with the following query. + +[source,sql] +---- +SELECT * FROM numbers1; +---- + +The following output shows the value after rounding. + +[source,sql] +---- + column_1 +---------- + 1.234569 +(1 row) +---- + +==== Create a table with numbers exceeding the range + +The `REAL` type only stores 32-bit floating-point numbers. In this example, the input numbers exceed the range. + +[source,sql] +---- +CREATE TABLE numbers2 ( + column_1 REAL +); + +INSERT into numbers2 (column_1) +VALUES (1.2345682991822); +---- + +Display the table with the following query. + +[source,sql] +---- +SELECT * FROM numbers2; +---- + +The final output will only return numbers that match the range. + +[source,sql] +---- + column_1 +----------- + 1.2345684 +(1 row) +---- + +== Double precision type + +The `DOUBLE PRECISION` data type is a 64-bit floating-point number compliant with the IEEE 754 binary64 format. + +=== Format + +[source,sql] +---- +column_name DOUBLE PRECISION +---- + +=== Example + +==== Create a table + +The following example creates a table with a `DOUBLE PRECISION` type column. + +[source,sql] +---- +CREATE TABLE numbersdouble ( + column_1 DOUBLE PRECISION +); + +INSERT into numbersdouble (column_1) +VALUES (1.234568817283122); +---- + +Display the table with the following query. + +[source,sql] +---- +SELECT * FROM numbersdouble; +---- + +The following is the output. + +[source,sql] +---- + column_1 +------------------- + 1.234568817283122 +(1 row) +---- + +==== Rounding + +Rounding might happen if the precision of an input number is too high. + +[source,sql] +---- +CREATE TABLE numbersdouble1 ( + column_1 DOUBLE PRECISION +); + +INSERT into numbersdouble1 (column_1) +VALUES (1.234568817283122773); +---- + +Display the table with the following query. + +[source,sql] +---- +SELECT * FROM numbersdouble1; +---- + +The following output shows the value after rounding. + +[source,sql] +---- + column_1 +-------------------- + 1.2345688172831228 +(1 row) +---- + +== Scientific notation support + +Redpanda SQL supports scientific notation for floating-point types. This feature supports expressions like 1.1e{plus}3, 1e-20, 1.1e02, and similar in your queries. + +=== Example + +[source,sql] +---- +SELECT 1.1e+3, 1e-20, 1.1e02; +---- + +The query returns: + +[source,sql] +---- + ?column? | ?column? | ?column? +----------+----------+---------- + 1100 | 1e-20 | 110 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-data-types/row.adoc b/modules/reference/pages/sql/sql-data-types/row.adoc new file mode 100644 index 000000000..bc55a2dac --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/row.adoc @@ -0,0 +1,77 @@ += ROW +:description: The ROW data type represents a composite value containing one or more fields of different types. +:page-topic-type: reference + +The `ROW` data type represents a composite value (also known as a struct or record) containing one or more fields of different types. + +== Syntax + +[source,sql] +---- +ROW(expression [, ...]) +(expression, expression [, ...]) +---- + +The explicit `ROW` keyword is required for single-element composites. For two or more elements, the `ROW` keyword is optional and the parenthesized list is treated as an implicit tuple. + +== Examples + +=== Create a ROW with multiple values + +[source,sql] +---- +SELECT ROW(1, 'hello', 3.14); +---- + +[source,sql] +---- + row +----------------- + (1,"hello",3.14) +(1 row) +---- + +=== Use implicit tuple syntax + +[source,sql] +---- +SELECT (1, 2, 3); +---- + +[source,sql] +---- + row +--------- + (1,2,3) +(1 row) +---- + +=== Create a nested ROW + +[source,sql] +---- +SELECT ROW(1, ROW(2, 3)); +---- + +[source,sql] +---- + row +----------- + (1,"(2,3)") +(1 row) +---- + +=== Create an empty ROW + +[source,sql] +---- +SELECT ROW(); +---- + +[source,sql] +---- + row +----- + () +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-data-types/text.adoc b/modules/reference/pages/sql/sql-data-types/text.adoc new file mode 100644 index 000000000..1e939f028 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/text.adoc @@ -0,0 +1,126 @@ += Text +:description: The text data type is a UTF8-encoded text with Unicode support, which stores a sequence of characters (text). +:page-topic-type: reference + +The text data type is a UTF8-encoded text with Unicode support, which stores a sequence of characters (text). + +== Examples + +Create an employee table with a text data type in each column: + +[source,sql] +---- +CREATE TABLE employee ( + employeeName text, + employeeDept text, + employeeRole text +); +INSERT INTO employee (employeeName, employeeDept, employeeRole) +VALUES ('John','Finance','Staff'), + ('Maya','Product','Staff'), + ('Jane','Finance','Staff'), + ('Phil','HR','Manager'); +---- + +[TIP] +==== +Insert the text value between the single quotes `' '`. +==== +The following output shows the created table: + +[source,sql] +---- ++---------------+---------------+---------------+ +| employeename | employeedept | employeerole | ++---------------+---------------+---------------+ +| John | Finance | Staff | +| Maya | Product | Staff | +| Jane | Finance | Staff | +| Phil | HR | Manager | ++---------------+---------------+---------------+ +---- + +== Text with SUBSTR function + +The `substr()` function extracts a specific number of characters from a text. + +=== Syntax + +[source,sql] +---- +substr( text, start_position, length ) +---- + +The syntax includes the following parameters: + +* `text`is the specified text. +* `start_position` is the starting position, specifying the part from which the substring is returned. Use an integer value. +* `length` determines the number of characters to extract. Use one or more characters. + +[NOTE] +==== +The first position in the `text` is 1. +==== + +=== Example + +Insert a value into the text column. + +[source,sql] +---- +SELECT substr('Watermelon',6,5) AS "Fruit"; +---- + +The following output shows the result: + +[source,sql] +---- ++-------------+ +| Fruit | ++-------------+ +| melon | ++-------------+ +---- + +== Text with LENGTH function + +The `length()` function returns the number of characters in a text. + +[NOTE] +==== +The number of characters might be different from the byte length. +==== + +=== Syntax + +The length function will take a text as a parameter. + +[source,sql] +---- +LENGTH (text); +---- + +=== Example + +Insert a value into the text column. + +[source,sql] +---- +SELECT LENGTH ('UNITED STATES'); +---- + +The following output shows the result. + +[source,sql] +---- ++---------+ +| f | ++---------+ +| 13 | ++---------+ +---- + +[NOTE] +==== +The `length()` function will also count spaces. +==== diff --git a/modules/reference/pages/sql/sql-data-types/time-type/index.adoc b/modules/reference/pages/sql/sql-data-types/time-type/index.adoc new file mode 100644 index 000000000..a96a32040 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/time-type/index.adoc @@ -0,0 +1,3 @@ += Time Types +:description: Reference for time data types and operators in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-data-types/time-type/time-operators.adoc b/modules/reference/pages/sql/sql-data-types/time-type/time-operators.adoc new file mode 100644 index 000000000..5bca19a53 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/time-type/time-operators.adoc @@ -0,0 +1,379 @@ += Time operators +:description: Time operators in Redpanda SQL perform various operations on dates, times, and intervals. +:page-topic-type: reference + +Time operators in Redpanda SQL perform various operations on dates, times, and intervals. + +== DATE {plus} INTEGER + +Add a specific number of days to a date. + +Example: + +[source,sql] +---- +select date '2022-03-15' + 14 as "result"; +---- + +The result is the date 14 days after '`2022-03-15`': + +[source,sql] +---- + result +------------ + 2022-03-29 +---- + +=== INTEGER {plus} DATE + +Adding and multiplying time operators can also be done in reverse order. For example, you can add a number of days to a date in the format of `Integer {plus} Date`. + +[source,sql] +---- +select 14 + date '2022-03-15' AS "result"; +---- + +This produces the same result: 14 days after '`2022-03-15`' is '`2022-03-29`': + +[source,sql] +---- + result +------------ + 2022-03-29 +---- + +== DATE {plus} INTERVAL + +Add a specified interval to a date. + +Example: + +[source,sql] +---- +select date '2022-03-15' + interval '3 months' as "result"; +---- + +The result is the date three months after '`2022-03-15`': + +[source,sql] +---- + result +---------------------------- + 2022-06-15 00:00:00.000000 +---- + +== DATE - INTEGER + +Subtract a certain number of days from a date. + +Example: + +[source,sql] +---- +select date '2022-03-15' - 7 as "result"; +---- + +The result is the date 7 days before '`2022-03-15`': + +[source,sql] +---- + result +------------ + 2022-03-08 +---- + +== DATE - INTERVAL + +Subtract a specified interval from a date. + +Example: + +[source,sql] +---- +select date '2022-03-15' - interval '2 hour' as "result"; +---- + +The result is the timestamp two hours before '`2022-03-15`': + +[source,sql] +---- + result +---------------------------- + 2022-03-14 22:00:00.000000 +---- + +== DATE - DATE + +Subtract dates. + +Example: + +[source,sql] +---- +select date '2023-03-15' - date '2023-01-10' as "result"; +---- + +The number of days elapsed between '`2023-03-15`' and '`2023-01-10`' is 64 days. + +[source,sql] +---- + result +-------- + 64 +---- + +== DATE {plus} TIME + +Add a time-of-day to a date. + +Example: + +[source,sql] +---- +select date '2010-05-20' + time '02:00' as "result"; +---- + +The result is a timestamp combining the date and time: + +[source,sql] +---- + result +---------------------------- + 2010-05-20 02:00:00.000000 +---- + +== TIME {plus} INTERVAL + +Add a certain interval to a given time. + +Example: + +[source,sql] +---- +select time '12:30' + interval '1 hour' as "result"; +---- + +The result is the time one hour after '`12:30`': + +[source,sql] +---- + result +----------------- + 13:30:00.000000 +---- + +== TIME - INTERVAL + +Subtract a specified interval from a given time. + +Example: + +[source,sql] +---- +select time '18:45' - interval '45 minutes' as "result"; +---- + +The result is the time 18:00: + +[source,sql] +---- + result +----------------- + 18:00:00.000000 +---- + +== TIME - TIME + +Get a time difference by subtracting one time from another. + +Example: + +[source,sql] +---- +select time '10:00' - TIME '08:20' as "result"; +---- + +In this example, the time difference between the two provided times is 1 hour and 40 minutes. + +[source,sql] +---- + result +----------------- + 01:40:00.000000 +---- + +== TIMESTAMP {plus} INTERVAL + +Add a timestamp and an interval. + +Example: + +[source,sql] +---- +select timestamp '2021-01-05 12:00:00' + interval '5 days' as "result"; +---- + +The result is a new timestamp, 5 days after '`2021-01-05 12:00:00`': + +[source,sql] +---- + result +---------------------------- + 2021-01-10 12:00:00.000000 +---- + +== TIMESTAMP - INTERVAL + +Subtract an interval from a timestamp. + +Example: + +[source,sql] +---- +select timestamp '2022-01-04 12:00:00' - interval '3 days' as "result"; +---- + +In this example, it subtracts 3 days from '`2022-01-04 12:00:00`'. + +[source,sql] +---- + result +---------------------------- + 2022-01-01 12:00:00.000000 +---- + +== TIMESTAMP - TIMESTAMP + +Get an interval by subtracting one timestamp from another. + +Example: + +[source,sql] +---- +select timestamp '2022-01-05 18:30:00' - timestamp '2022-01-01 12:00:00' as "result"; +---- + +It gives the interval between the two timestamps, 102 hours and 30 minutes. + +[source,sql] +---- + result +------------------ + 102:30:00.000000 +---- + +== INTERVAL {plus} INTERVAL + +Add intervals. + +Example: + +[source,sql] +---- +select interval '2 months 2 days' + interval '6 days' as "result"; +---- + +It adds 6 days to 2 days, resulting in a total of 2 months and 8 days. + +[source,sql] +---- + result +--------------- + 2 mons 8 days +---- + +== INTERVAL - INTERVAL + +Subtract intervals. + +Example: + +[source,sql] +---- +select interval '2 months' - interval '20 days' as "result"; +---- + +It subtracts 20 days from 2 months. + +[source,sql] +---- + result +----------------- + 2 mons -20 days +---- + +== INTERVAL * INTEGER + +Multiply an interval by an integer. + +Example: + +[source,sql] +---- +select interval '2 hours' * 3 as "result"; +---- + +It multiplies '`2 hours`' by 3, the result is 6 hours. + +[source,sql] +---- + result +----------------- + 06:00:00.000000 +---- + +== INTERVAL * DOUBLE PRECISION + +Multiply an interval by a scalar. + +Example: + +[source,sql] +---- +select interval '2 hours' * 1.5 as "result"; +---- + +It multiplies '`2 hours`' by 1.5, and returns 3 hours. + +[source,sql] +---- + result +----------------- + 03:00:00.000000 +---- + +== INTERVAL / NUMBER + +Divide an interval by an integer or scalar. + +=== Divide by an integer + +[source,sql] +---- +select interval '1 hour' / 2 as "result"; +---- + +It divides '`1 hour`' by 2, and returns 30 minutes. + +[source,sql] +---- + result +----------------- + 00:30:00.000000 +---- + +=== Divide by a scalar + +[source,sql] +---- +select interval '2 hours' / 1.5 as "result"; +---- + +It divides '`2 hours`' by 1.5, and returns 1 hour 20 minutes. + +[source,sql] +---- + result +----------------- + 01:20:00.000000 +---- diff --git a/modules/reference/pages/sql/sql-data-types/time-type/time.adoc b/modules/reference/pages/sql/sql-data-types/time-type/time.adoc new file mode 100644 index 000000000..8f825d1ca --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/time-type/time.adoc @@ -0,0 +1,68 @@ += Time +:description: The TIME data type in Redpanda SQL stores time values without any date information. +:page-topic-type: reference + +The `TIME` data type in Redpanda SQL stores time values without any date information. It represents a specific time of day, independent of any time zone or date. + +== Format + +The format for the TIME data type is as follows: + +[source,sql] +---- +HH:MM:SS[.SSSSSS] +---- + +* `HH`: One or two-digit hour (valid values from 00 to 23). +* `MM`: One or two-digit minutes (valid values from 00 to 59). +* `SS`: One or two-digit seconds (valid values from 00 to 59). +* `[.SSSSSS]` : Optional fractional seconds, with up to six decimal places (microsecond precision). + +== Examples + +=== Create a schedule table + +The following example creates a table to manage employee schedules, containing their names and the time they are scheduled to start work. The `start_time` column uses the `TIME` data type. + +[source,sql] +---- +CREATE TABLE employee_schedule ( + employee_name TEXT, + start_time TIME +); + +INSERT INTO employee_schedule (employee_name, start_time) +VALUES +('John Doe', '08:30:00'), +('Jane Smith', '09:00:00'), +('Michael Johnson', '10:15:00'); +---- + +The table has been successfully created after executing the query: + +[source,sql] +---- +COMPLETE +INSERT 0 3 +---- + +=== View the employee schedule + +To view all employee schedules in the `employee_schedule` table, use the `SELECT` statement. + +[source,sql] +---- +SELECT * FROM employee_schedule; +---- + +The output displays the employee names and their corresponding scheduled start times: + +[source,sql] +---- + employee_name | start_time +-----------------+----------------- + John Doe | 08:30:00.000000 + Jane Smith | 09:00:00.000000 + Michael Johnson | 10:15:00.000000 +(3 rows) +---- diff --git a/modules/reference/pages/sql/sql-data-types/timestamp-with-time-zone.adoc b/modules/reference/pages/sql/sql-data-types/timestamp-with-time-zone.adoc new file mode 100644 index 000000000..3bc7ba95c --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/timestamp-with-time-zone.adoc @@ -0,0 +1,151 @@ += Timestamp with Time Zone +:description: Redpanda SQL provides you with two data types for handling timestamps: +:page-topic-type: reference + +Redpanda SQL provides two data types for handling timestamps: + +* xref:reference:sql/sql-data-types/timestamp-without-time-zone.adoc[Timestamp without time zone]: Stores both date and time values. +* Timestamp with time zone: Stores date and time values and processes time zone information during operations. +** During an `INSERT` operation, the time zone is ignored. The date and time are stored without considering the time zone. +** During a `SELECT` operation, the time zone information from the user's session is also ignored. The data is returned exactly as it is stored without adjusting the time zone. + +[NOTE] +==== +All user sessions have a local timezone associated with them, affecting how `timestamp with time zone` values are displayed. The timezone information is not stored in the database. Every time a user requests a value of this type, Redpanda SQL converts from UTC to the user's local timezone before displaying it. +==== + +[NOTE] +==== +Redpanda SQL relies on timezone information served by the host machine's operating system. It must be up-to-date to ensure correct timestamp conversions, date calculations, and compliance with regional time changes such as daylight saving adjustments. +==== + +== Format + +The `timestamp with time zone` data type has the following format: + +[source,sql] +---- +YYYY-MM-DD HH:MM:SS.SSSSSS+TZ +---- + +* `YYYY`: Four-digit year. +* `MM`: One or two-digit month. +* `DD`: One or two-digit day. +* `HH`: One or two-digit hour (valid values from 00 to 23). +* `MM`: One or two-digit minutes (valid values from 00 to 59). +* `SS`: One or two-digit seconds (valid values from 00 to 59). +* `.SSSSSS`: Optional fractional digits, up to six (microsecond precision). +* `+TZ`: Time zone offset in the format +/-HH:MM (for example, +05:30, -08:00). + +== Examples + +=== Create a table + +The following example creates a table named `event_log` that consists of a timestamp without a time zone and a timestamp with time zone columns. The values in the `event_timestamp_tz` are in the "Europe/Moscow" timezone. + +[source,sql] +---- +CREATE TABLE events_log ( + event_name TEXT, + event_timestamp TIMESTAMP WITHOUT TIME ZONE, + event_timestamp_tz TIMESTAMP WITH TIME ZONE +); +INSERT INTO events_log (event_name, event_timestamp, event_timestamp_tz) +VALUES + ('Event 1', '2023-07-27 12:30:00', '2023-07-27 12:30:00+03:00'), + ('Event 2', '2023-07-27 08:45:00', '2023-07-27 08:45:00+03:00'), + ('Event 3', '2023-07-27 20:15:00', '2023-07-27 20:15:00+03:00'); +---- + +The table has been successfully created after executing the query: + +[source,sql] +---- +COMPLETE +INSERT 0 3 +---- + +=== Display the table + +Run the `SELECT` statement to get all records of the table: + +[source,sql] +---- +SELECT event_timestamp, event_timestamp_tz +FROM events_log; +---- + +This returns the following result. Notice that the `event_timestamp_tz` is converted to UTC timezone. + +[source,sql] +---- + event_timestamp | event_timestamp_tz +----------------------------+--------------------------------- + 2023-07-27 12:30:00.000000 | 2023-07-27 09:30:00.000000+0000 + 2023-07-27 08:45:00.000000 | 2023-07-27 05:45:00.000000+0000 + 2023-07-27 20:15:00.000000 | 2023-07-27 17:15:00.000000+0000 +(3 rows) +---- + +=== Order table by timestamp + +To sort the events based on the `event_timestamp` column and display the corresponding UTC in the `event_timestamp_tz` column, run the query: + +[source,sql] +---- +SELECT + event_timestamp, + event_timestamp_tz, + event_timestamp AT TIME ZONE 'UTC' AS utc_time +FROM + events_log +ORDER BY + event_timestamp; +---- + +This query retrieves the `event_timestamp` and `event_timestamp_tz` columns and calculates the corresponding UTC time using the `AT TIME ZONE 'UTC'` operator. + +The results are ordered based on the `event_timestamp` column, producing a sorted list of events with their corresponding local and UTC times. + +[source,sql] +---- + event_timestamp | event_timestamp_tz | utc_time +----------------------------+---------------------------------+--------------------------------- + 2023-07-27 08:45:00.000000 | 2023-07-27 05:45:00.000000+0000 | 2023-07-27 08:45:00.000000+0000 + 2023-07-27 12:30:00.000000 | 2023-07-27 09:30:00.000000+0000 | 2023-07-27 12:30:00.000000+0000 + 2023-07-27 20:15:00.000000 | 2023-07-27 17:15:00.000000+0000 | 2023-07-27 20:15:00.000000+0000 +(3 rows) +---- + +== AT TIME ZONE operator + +The `AT TIME ZONE` operator in timestamp with time zone converts the given timestamp with time zone to the new time zone, with no time zone designation. + +=== Syntax + +[source,sql] +---- +SELECT TIMESTAMP WITH TIME ZONE 'timestamp' AT TIME ZONE 'TIME_ZONE'; +---- + +* `timestamp`: The date and time value with the time zone. +* `TIME_ZONE`: The target time zone to which Redpanda SQL converts the timestamp. The user's time zone is fixed to UTC. + +=== Example + +In this example, a specified timestamp with time zone is converted into the UTC timezone. + +[source,sql] +---- +SELECT TIMESTAMP WITH TIME ZONE '2023-03-04 10:29:90-05' AT TIME ZONE 'UTC'; +---- + +The result is a timestamp without a time zone: + +[source,sql] +---- + timezone +---------------------------- + 2023-03-04 15:30:30.000000 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-data-types/timestamp-without-time-zone.adoc b/modules/reference/pages/sql/sql-data-types/timestamp-without-time-zone.adoc new file mode 100644 index 000000000..6c7c4eb0f --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/timestamp-without-time-zone.adoc @@ -0,0 +1,210 @@ += Timestamp Without Time Zone +:description: The timestamp data type stores time and date values without a time zone. +:page-topic-type: reference + +== Overview + +The timestamp data type stores time and date values without a time zone. It represents a fixed time, independent of any time zone or applied globally. + +== Format + +[source,sql] +---- +YYYY-MM-DD [HH:MM:SS[.SSSSSS]] +---- + +* `YYYY`: Four-digit year. +* `MM`: One / two-digit month. +* `DD`: One / two-digit day. +* `HH`: One / two-digit hour (valid values from 00 to 23). +* `MM`: One / two-digit minutes (valid values from 00 to 59). +* `SS`: One / two-digit seconds (valid values from 00 to 59). +* `[.SSSSSS]`: Up to six fractional digits (microsecond precision). + +[NOTE] +==== +Fractional digits are the digits after the decimal point ( . ) +==== + +== Examples + +=== Create a table + +The following example creates a `visitor` table to store visitor data in an office building. It consists of the visitor's name, the purpose of the visit, company, time, and date, which uses the `Timestamp` data type. + +[source,sql] +---- +CREATE TABLE visitors ( + visitorName TEXT, + visitPurp TEXT, + visitComp TEXT, + visitDate TIMESTAMP WITHOUT TIME ZONE +); +INSERT INTO visitors (visitorName, visitPurp, visitComp, visitDate) +VALUES + ('Peter', 'Interview', 'Apple', '2022-01-10 09:12:40'), + ('Will', 'Meeting', 'McKesson', '2022-01-29 11:28:02'), + ('Max', 'Meeting', 'McKesson', '2022-02-11 10:19:10'), + ('Dustin', 'Meeting', 'CVS Health', '2022-03-18 14:24:08'), + ('Lizzy', 'Meeting', 'CVS Health', '2022-04-23 13:10:09'), + ('Evy', 'Interview', 'Apple', '2022-05-01 08:45:50'); +---- + +The `visitors` table has been successfully created after executing the query: + +[source,sql] +---- +COMPLETE +INSERT 0 6 +---- + +=== Display the table + +Run the `SELECT` statement to get all records of the `visitors` table: + +[source,sql] +---- +SELECT * FROM visitors; +---- + +This returns the following result: + +[source,sql] +---- ++--------------+--------------+---------------+-----------------------+ +| visitorName | visitPurp | visitComp | visitDate | ++--------------+--------------+---------------+-----------------------+ +| Peter | Interview | Apple | 2022-01-10 09:12:40 | +| Will | Meeting | McKesson | 2022-01-29 11:28:02 | +| Max | Meeting | McKesson | 2022-02-11 10:19:10 | +| Dustin | Meeting | CVS Health | 2022-03-18 14:24:08 | +| Lizzy | Meeting | CVS Health | 2022-04-23 13:10:09 | +| Evy | Interview | Apple | 2022-05-01 08:45:50 | ++--------------+--------------+---------------+-----------------------+ +---- + +=== Look for a specific timestamp + +The following example retrieves records with a specified timestamp: + +[source,sql] +---- +SELECT * FROM visitors +WHERE visitDate = '2022-04-23 13:10:09'; +---- + +The query returns the following results: + +[source,sql] +---- ++--------------+--------------+---------------+-----------------------+ +| visitorName | visitPurp | visitComp | visitDate | ++--------------+--------------+---------------+-----------------------+ +| Lizzy | Meeting | CVS Health | 2022-04-23 13:10:09 | ++--------------+--------------+---------------+-----------------------+ +---- + +=== Insert a value that exceeds the standard format + +The time in timestamp has a standard format; that is, for minutes only valid for values from 00 to 59. + +The following example inserts a new record into the visitors table with a value of `60`, which exceeds the standard seconds format. + +[source,sql] +---- +INSERT INTO visitors (visitorName, visitPurp, visitComp, visitDate) +VALUES + ('Jolly', 'Survey', 'Apple', '2022-01-10 09:12:60'); +---- + +[source,sql] +---- +INSERT 0 1 + +Query returned successfully in 135 msec. +---- + +Verify the result by running the following `select` statement: + +[source,sql] +---- +SELECT * FROM visitors +WHERE visitorName = 'Jolly'; +---- + +The seconds are displayed as `00` because `60` adds 1 minute to the minutes' value. + +[source,sql] +---- ++--------------+--------------+---------------+-----------------------+ +| visitorName | visitPurp | visitComp | visitDate | ++--------------+--------------+---------------+-----------------------+ +| Jolly | Survey | Apple | 2022-01-10 09:13:00 | ++--------------+--------------+---------------+-----------------------+ +---- + +== AT TIME ZONE operator + +The `AT TIME ZONE` operator converts the input timestamp to the target time zone specified in the query. Additionally, the timestamp you inputted will always be presented in the user's local timezone (currently set as UTC). + +[WARNING] +==== +The result type of this operator differs from the input: it produces a timestamp with a time zone. +==== + +=== Syntax + +To use the `AT TIME ZONE` operator, you can follow this syntax: + +[source,sql] +---- +SELECT TIMESTAMP 'input_timestamp' AT TIME ZONE 'TIME_ZONE'; +---- + +Here's what each element means: + +* `input_timestamp`: This represents the date and time value you want to convert. The user's time zone is fixed to UTC. +* `TIME_ZONE`: The target time zone to which Redpanda SQL converts the timestamp. + +=== Example 1 + +Suppose you have a timestamp and want to convert it into the MST time zone: + +[source,sql] +---- +SELECT TIMESTAMP '2001-02-16 10:28:30' AT TIME ZONE 'MST'; +---- + +The result is a timestamp with the time zone adjusted to MST: + +[source,sql] +---- + f +--------------------------------- + 2001-02-16 17:28:30.000000+0000 +(1 row) +---- + +=== Example 2 + +Using the xref:reference:sql/sql-data-types/timestamp-without-time-zone.adoc[visitors] table, the following query retrieves a list of visit dates in the MST time zone: + +[source,sql] +---- +SELECT visitDate, visitDate AT TIME ZONE 'MST' as "visitDateMST" FROM visitors; +---- + +This query returns a list of two columns: `visitDate` displays the timestamps without a time zone, and `visitDateMST` stores the timestamps converted to the MST time zone. + +[source,sql] +---- + visitdate | visitDateMST +----------------------------+--------------------------------- + 2022-01-10 09:12:40.000000 | 2022-01-10 16:12:40.000000+0000 + 2022-01-29 11:28:02.000000 | 2022-01-29 18:28:02.000000+0000 + 2022-02-11 10:19:10.000000 | 2022-02-11 17:19:10.000000+0000 + 2022-03-18 14:24:08.000000 | 2022-03-18 21:24:08.000000+0000 + 2022-04-23 13:10:09.000000 | 2022-04-23 20:10:09.000000+0000 + 2022-05-01 08:45:50.000000 | 2022-05-01 15:45:50.000000+0000 +(6 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/avg.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/avg.adoc new file mode 100644 index 000000000..64a0d336e --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/avg.adoc @@ -0,0 +1,145 @@ += AVG +:description: The AVG() function calculates the average value of records. +:page-topic-type: reference + +The `AVG()` function calculates the average value of records. The supported input and return types are listed in the following table: + +[cols=",",options="header",] +|=== +|Input type |Return type +|`INTEGER` |`DOUBLE PRECISION` +|`BIGINT` |`DOUBLE PRECISION` +|`REAL` |`DOUBLE PRECISION` +|`DOUBLE PRECISION` |`DOUBLE PRECISION` +|=== + +[NOTE] +==== +If the input type is 32-bit, then the result is 64-bit +==== +Special cases: Returns NaN if the input contains a NaN. + +== Examples + +This example uses an `orders` table that stores details of the purchase transactions: + +[source,sql] +---- +CREATE TABLE orders ( + orderid int, + custname text, + orderproduct text, + ordertotal real +); +INSERT INTO orders (orderid, custname, orderproduct, ordertotal) +VALUES +(9557411, 'Maya', 'Jeans', 10.5), +(9557421, 'Aaron', 'T-Shirt', 9.2), +(9557451, 'Alex', 'Hat', 10.8), +(9557311, 'Will', 'Hat', 8.5), +(9557321, 'Will', 'T-Shirt', 12.15), +(9557351, 'Maya', 'T-Shirt', 9.5), +(9557221, 'Maya', 'Jeans', 11.02), +(9557251, 'Alex', 'Jeans', 11.09), +(9557231, 'Aaron', 'Hat', 14.56), +(9557281, 'Aaron', 'Hat', 12.15), +(9557291, 'Will', 'T-Shirt', 13.1); +---- + +[source,sql] +---- +SELECT * FROM orders; +---- + +This query shows the following table: + +[source,sql] +---- ++----------+-----------+---------------+-------------+ +| orderid | custname | orderproduct | ordertotal | ++----------+-----------+---------------+-------------+ +| 9557411 | Maya | Jeans | 10.5 | +| 9557421 | Aaron | T-Shirt | 9.2 | +| 9557451 | Alex | Hat | 10.8 | +| 9557311 | Will | Hat | 8.5 | +| 9557321 | Will | T-Shirt | 12.15 | +| 9557351 | Maya | T-Shirt | 9.5 | +| 9557221 | Maya | Jeans | 11.02 | +| 9557251 | Alex | Jeans | 11.09 | +| 9557231 | Aaron | Hat | 14.56 | +| 9557281 | Aaron | Hat | 12.15 | +| 9557291 | Will | T-Shirt | 13.1 | ++----------+-----------+---------------+-------------+ +---- + +=== AVG() with a single expression + +The first example calculates the average amount of all orders that customers have paid: + +[source,sql] +---- +SELECT AVG(ordertotal) AS "Order Total Average" +FROM orders; +---- + +This returns the following output: + +[source,sql] +---- ++---------------------+ +| Order Total Average | ++---------------------+ +| 11.142727331681685 | ++---------------------+ +---- + +=== AVG() with a GROUP BY clause + +The following example uses the `AVG()` function and `GROUP BY` clause to calculate the average amount paid by each customer: + +* First, the `GROUP BY` clause divides orders into groups based on customers +* Then, the `AVG` function is applied to each group. + +[source,sql] +---- +SELECT custname AS "Customer", AVG (ordertotal) AS "Total Price Average" +FROM orders +GROUP BY custname; +---- + +The query returns: + +[source,sql] +---- ++-----------+----------------------+ +| Customer | Total Price Average | ++-----------+----------------------+ +| Aaron | 11.96999994913737 | +| Alex | 10.945000171661377 | +| Will | 11.25 | +| Maya | 10.34000015258789 | ++-----------+----------------------+ +---- + +You can use the cast operator like`::NUMERIC(10,2)` to add two decimal numbers after the comma: + +[source,sql] +---- +SELECT custname AS "Customer", AVG (ordertotal)::NUMERIC(10,2) AS "Total Price Average" +FROM orders +GROUP BY custname; +---- + +The result will trim and round two numbers after the comma: + +[source,sql] +---- ++-----------+----------------------+ +| Customer | Total Price Average | ++-----------+----------------------+ +| Aaron | 11.97 | +| Alex | 10.95 | +| Will | 11.25 | +| Maya | 10.34 | ++-----------+----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-and.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-and.adoc new file mode 100644 index 000000000..a6eeab627 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-and.adoc @@ -0,0 +1,111 @@ += BOOL_AND +:description: The BOOL_AND() function calculates all the boolean values in the aggregated group, which will have these results: +:page-topic-type: reference + +The `BOOL_AND()` function calculates all the boolean values in the aggregated group, which will have these results: + +* `true` if all the values are `true` for every row. +* `false` if at least one row in the group is `false`. + +The input and the return type must be in `BOOL`. + +[NOTE] +==== +`NULL` values are not aggregated, so it returns `NULL` if there are zero input rows. +==== + +== Examples + +This example uses a payment table that stores details of the orders, whether the order has been paid or unpaid by the customer: + +[source,sql] +---- +CREATE TABLE payment ( + orderid int, + custname text, + orderproduct text, + ordertotal real, + paid boolean +); +INSERT INTO payment (orderid, custname, orderproduct, ordertotal, paid) +VALUES +(9557411, 'Maya', 'Jeans', 10.5, true), +(9557421, 'Aaron', 'T-Shirt', 9.2, true), +(9557451, 'Alex', 'Hat', 10.8, true), +(9557311, 'Will', 'Hat', 8.5, true), +(9557321, 'Will', 'T-Shirt', 12.15, true), +(9557351, 'Maya', 'T-Shirt', 9.5, true), +(9557221, 'Maya', 'Jeans', 11.02, true), +(9557251, 'Alex', 'Jeans', 11.09, true), +(9557231, 'Aaron', 'Hat', 14.56, false), +(9557281, 'Aaron', 'Hat', 12.15, true), +(9557291, 'Will', 'T-Shirt', 13.1, true); +---- + +[source,sql] +---- +SELECT * FROM payment; +---- + +This query shows the following table: + +[source,sql] +---- ++----------+-----------+---------------+-------------+-------+ +| orderid | custname | orderproduct | ordertotal | paid | ++----------+-----------+---------------+-------------+-------+ +| 9557411 | Maya | Jeans | 10.5 | t | +| 9557421 | Aaron | T-Shirt | 9.2 | t | +| 9557451 | Alex | Hat | 10.8 | t | +| 9557311 | Will | Hat | 8.5 | t | +| 9557321 | Will | T-Shirt | 12.15 | t | +| 9557351 | Maya | T-Shirt | 9.5 | t | +| 9557221 | Maya | Jeans | 11.02 | t | +| 9557251 | Alex | Jeans | 11.09 | t | +| 9557231 | Aaron | Hat | 14.56 | f | +| 9557281 | Aaron | Hat | 12.15 | t | +| 9557291 | Will | T-Shirt | 13.1 | t | ++----------+-----------+---------------+-------------+-------+ +---- + +=== `BOOL_AND` with a false result + +To find out if all customers have paid for their orders, run the query: + +[source,sql] +---- +SELECT BOOL_AND(paid) AS "final_result" FROM payment; +---- + +In the `BOOL_AND` function, if there is at least one `FALSE` value, the overall result is `FALSE`. The output shows that there is an order that hasn't been paid. + +[source,sql] +---- ++--------------+ +| final_result | ++--------------+ +| f | ++--------------+ +---- + +=== `BOOL_AND` with a true result + +To find out if Maya has paid for her orders, run the query: + +[source,sql] +---- +SELECT BOOL_AND(paid) AS Maya_Paid +FROM payment +WHERE custname ='Maya'; +---- + +In the `BOOL_AND` function, if all values are `TRUE`, then the overall result is `TRUE`. The output shows that Maya has paid all her orders. + +[source,sql] +---- ++-----------+ +| maya_paid | ++-----------+ +| t | ++-----------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-or.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-or.adoc new file mode 100644 index 000000000..3864fb744 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-or.adoc @@ -0,0 +1,111 @@ += BOOL_OR +:description: The BOOL_OR() function calculates all the boolean values in the aggregated group, which will have these results: +:page-topic-type: reference + +The `BOOL_OR()` function calculates all the boolean values in the aggregated group, which will have these results: + +* `false` if all the values are `false` for every row. +* `true` if at least one row in the group is true. + +The input and the return type must be in `BOOL`. + +[NOTE] +==== +`NULL` values are not aggregated, so it returns `NULL` if there are zero input rows. +==== + +== Examples + +This example uses a payment table that stores details of the orders, whether the order has been paid or unpaid by the customer: + +[source,sql] +---- +CREATE TABLE payment ( + orderid int, + custname text, + orderproduct text, + ordertotal real, + paid boolean +); +INSERT INTO payment (orderid, custname, orderproduct, ordertotal, paid) +VALUES +(9557411, 'Maya', 'Jeans', 10.5, false), +(9557421, 'Aaron', 'T-Shirt', 9.2, false), +(9557451, 'Alex', 'Hat', 10.8, false), +(9557311, 'Will', 'Hat', 8.5, true), +(9557321, 'Will', 'T-Shirt', 12.15, false), +(9557351, 'Maya', 'T-Shirt', 9.5, true), +(9557221, 'Maya', 'Jeans', 11.02, false), +(9557251, 'Alex', 'Jeans', 11.09, false), +(9557231, 'Aaron', 'Hat', 14.56, false), +(9557281, 'Aaron', 'Hat', 12.15, false), +(9557291, 'Will', 'T-Shirt', 13.1, false); +---- + +[source,sql] +---- +SELECT * FROM payment; +---- + +This query shows the following table: + +[source,sql] +---- ++----------+-----------+---------------+-------------+--------+ +| orderid | custname | orderproduct | ordertotal | paid | ++----------+-----------+---------------+-------------+--------+ +| 9557411 | Maya | Jeans | 10.5 | f | +| 9557421 | Aaron | T-Shirt | 9.2 | f | +| 9557451 | Alex | Hat | 10.8 | f | +| 9557311 | Will | Hat | 8.5 | t | +| 9557321 | Will | T-Shirt | 12.15 | f | +| 9557351 | Maya | T-Shirt | 9.5 | t | +| 9557221 | Maya | Jeans | 11.02 | f | +| 9557251 | Alex | Jeans | 11.09 | f | +| 9557231 | Aaron | Hat | 14.56 | f | +| 9557281 | Aaron | Hat | 12.15 | f | +| 9557291 | Will | T-Shirt | 13.1 | f | ++----------+-----------+---------------+-------------+--------+ +---- + +=== `BOOL_OR` with a true result + +To find out if all customers have paid for their orders, run the query: + +[source,sql] +---- +SELECT BOOL_OR(paid) AS "final_result" FROM payment; +---- + +If there is at least one `TRUE` value, the overall result is `TRUE`. The output shows that some order has been paid regardless of the other unpaid orders. + +[source,sql] +---- ++--------------+ +| final_result | ++--------------+ +| t | ++--------------+ +---- + +=== `BOOL_OR` with a false result + +To find out if Aaron has paid for his orders, run the query: + +[source,sql] +---- +SELECT BOOL_OR(paid) AS aaron_paid +FROM payment +WHERE custname ='Aaron'; +---- + +If all values are `FALSE`, then the overall result is `FALSE`. The output shows that Aaron hasn't paid for all his orders. + +[source,sql] +---- ++------------+ +| aaron_paid | ++------------+ +| f | ++------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/count.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/count.adoc new file mode 100644 index 000000000..9f3d8f5cb --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/count.adoc @@ -0,0 +1,138 @@ += COUNT +:description: The COUNT() function retrieves the number of records that match a specific condition. +:page-topic-type: reference + +The `COUNT()` function retrieves the number of records that match a specific condition. It works with any data type supported by Redpanda SQL and returns a `BIGINT`. + +[NOTE] +==== +The output will indicate the total number of rows in a table, regardless of the input types. +==== + +== Examples + +This example uses an orders table that stores details of the purchase transactions: + +[source,sql] +---- +CREATE TABLE orders ( + orderid int, + custname text, + orderproduct text, + ordertotal real +); +INSERT INTO orders (orderid, custname, orderproduct, ordertotal) +VALUES +(9557411, 'Maya', 'Jeans', 10.5), +(9557421, 'Aaron', 'T-Shirt', 9.2), +(9557451, 'Alex', 'Hat', 10.8), +(9557311, 'Will', 'Hat', 8.5), +(9557321, 'Will', 'T-Shirt', 12.15), +(9557351, 'Maya', 'T-Shirt', 9.5), +(9557221, 'Maya', 'Jeans', 11.02), +(9557251, 'Alex', 'Jeans', 11.09), +(9557231, 'Aaron', 'Hat', 14.56), +(9557281, 'Aaron', 'Hat', 12.15), +(9557291, 'Will', 'T-Shirt', 13.1); +---- + +[source,sql] +---- +SELECT * FROM orders; +---- + +This query shows the following table: + +[source,sql] +---- ++----------+-----------+---------------+-------------+ +| orderid | custname | orderproduct | ordertotal | ++----------+-----------+---------------+-------------+ +| 9557411 | Maya | Jeans | 10.5 | +| 9557421 | Aaron | T-Shirt | 9.2 | +| 9557451 | Alex | Hat | 10.8 | +| 9557311 | Will | Hat | 8.5 | +| 9557321 | Will | T-Shirt | 12.15 | +| 9557351 | Maya | T-Shirt | 9.5 | +| 9557221 | Maya | Jeans | 11.02 | +| 9557251 | Alex | Jeans | 11.09 | +| 9557231 | Aaron | Hat | 14.56 | +| 9557281 | Aaron | Hat | 12.15 | +| 9557291 | Will | T-Shirt | 13.1 | ++----------+-----------+---------------+-------------+ +---- + +=== `COUNT()` with a single expression + +This example returns the number of all orders in the orders table: + +[source,sql] +---- +SELECT COUNT(*) FROM orders; +---- + +The query returns: + +[source,sql] +---- ++-------+ +| count | ++-------+ +| 11 | ++-------+ +---- + +=== `COUNT()` with a `GROUP BY` clause + +This example will combine the `COUNT()` function and the `GROUP BY` clause. + +* The `GROUP BY` clause groups the orders based on the customer's name. +* The `COUNT()` function counts the orders for each customer. + +[source,sql] +---- +SELECT custname, COUNT (orderid) +FROM orders +GROUP BY custname; +---- + +The query returns: + +[source,sql] +---- ++-----------+--------+ +| custname | count | ++-----------+--------+ +| Aaron | 3 | +| Alex | 2 | +| Will | 3 | +| Maya | 3 | ++-----------+--------+ +---- + +=== `COUNT()` with a `HAVING` clause + +This example combines the `COUNT()` function and the `HAVING` clause to apply a specific condition to find customers who have made more than two orders: + +[source,sql] +---- +SELECT custname, COUNT (orderid) +FROM orders +GROUP BY custname +HAVING COUNT (orderid) > 2; +---- + +* The `GROUP BY` clause groups the orders based on the customer's name. +* The `HAVING` clause will filter only customers with more than two order IDs. +* The `COUNT()` function counts the orders for each customer. + +[source,sql] +---- ++-----------+-------+ +| custname | count | ++-----------+-------+ +| Aaron | 3 | +| Will | 3 | +| Maya | 3 | ++-----------+-------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/distinct.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/distinct.adoc new file mode 100644 index 000000000..e48a01fcb --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/distinct.adoc @@ -0,0 +1,210 @@ += DISTINCT +:description: When using aggregation functions, they can contain the DISTINCT keyword. +:page-topic-type: reference + +When using aggregation functions, they can contain the `DISTINCT` keyword. It acts as a qualifier for them, to ensure that only unique values are being processed. Syntax: + +[source,sql] +---- +aggregation function (DISTINCT expression [clause] ...) ... +---- + +`DISTINCT` keyword can be combined with the following aggregate functions: + +* `AVG()` +* `COUNT()` +* `MAX()` +* `MIN()` +* `SUM()` + +All functions listed in this section operate on the same input and return types, that are supported by their counterparts without any qualifiers. They can be grouped without any limitations, provided that they utilise a single `DISTINCT` keyword. + +== Examples + +This section focuses on a few examples that showcase sample usage of these concepts. They are based on creation of the following tables: + +[source,sql] +---- +CREATE TABLE customer ( + customer_id int, + cust_name text +); +INSERT INTO customer + (customer_id, cust_name) +VALUES + (11112, 'Alex'), + (11113, 'Aaron'), + (11114, 'Alice'), + (11115, 'Nina'), + (11116, 'Rosy'), + (11117, 'Martha'), + (11118, 'John'); + +CREATE TABLE rental ( + rental_id int, + rental_date timestamp, + return_date timestamp, + car text, + customer_id int, + total_price int +); +INSERT INTO rental (rental_id, rental_date, return_date, car, customer_id, total_price) +VALUES +(8557411, '2022-04-02 09:10:19', '2022-04-10 10:15:05', 'Audi', 11112, 1400), +(8557421, '2022-04-06 07:00:30', '2022-04-19 07:10:19', 'BMW', 11115, 2000), +(8557451, '2022-04-19 08:00:20', '2022-04-24 08:05:00', 'Cadillac', 11112, 1000), +(8557311, '2022-05-11 09:15:28', '2022-05-18 09:00:18', 'Audi', 11115, 1500), +(8557321, '2022-05-20 10:12:22', '2022-05-28 10:08:48', 'Audi', 11113, 1500), +(8557351, '2022-06-10 12:18:09', '2022-06-20 18:12:23', 'Cadillac', 11114, 1200), +(8557221, '2022-06-17 14:02:02', '2022-06-20 14:17:02', 'Chevrolet', 11112, 1300), +(8557251, '2022-07-12 05:19:49', '2022-07-19 07:15:28', 'Chevrolet', 11116, 1400), +(8557231, '2022-08-09 09:29:08', '2022-08-24 09:30:58', 'Cadillac', 11114, 2000), +(8557291, '2022-08-18 15:15:20', '2022-09-01 15:30:19', 'BMW', 11117, 3000); +---- + +The created tables: + +[source,sql] +---- +SELECT * FROM customer; + ++-------------+-----------+ +| customer_id | cust_name | ++-------------+-----------+ +| 11112 | Alex | +| 11113 | Aaron | +| 11114 | Alice | +| 11115 | Nina | +| 11116 | Rosy | +| 11117 | Martha | +| 11118 | John | ++-------------+-----------+ + +SELECT * FROM rental; + ++------------+---------------------+---------------------+-----------+---------------+-------------+ +| rental_id | rental_date | return_date | car | customer_id | total_price | ++------------+---------------------+---------------------+-----------+---------------+-------------+ +| 8557411 | 2022-04-02 09:10:19 | 2022-04-10 10:15:05 | Audi | 11112 | 1400 | +| 8557421 | 2022-04-06 07:00:30 | 2022-04-19 07:10:19 | BMW | 11115 | 2000 | +| 8557451 | 2022-04-19 08:00:20 | 2022-04-24 08:05:00 | Cadillac | 11112 | 1000 | +| 8557311 | 2022-05-11 09:15:28 | 2022-05-18 09:00:18 | Audi | 11115 | 1500 | +| 8557321 | 2022-05-20 10:12:22 | 2022-05-28 10:08:48 | Audi | 11113 | 1500 | +| 8557351 | 2022-06-10 12:18:09 | 2022-06-20 18:12:23 | Cadillac | 11114 | 1200 | +| 8557221 | 2022-06-17 14:02:02 | 2022-06-20 14:17:02 | Chevrolet | 11112 | 1300 | +| 8557251 | 2022-07-12 05:19:49 | 2022-07-19 07:15:28 | Chevrolet | 11116 | 1400 | +| 8557231 | 2022-08-09 09:29:08 | 2022-08-24 09:30:58 | Cadillac | 11114 | 2000 | +| 8557291 | 2022-08-18 15:15:20 | 2022-09-01 15:30:19 | BMW | 11117 | 3000 | ++------------+---------------------+---------------------+-----------+---------------+-------------+ +---- + +=== `DISTINCT` combined with `COUNT` function + +The following example uses `DISTINCT` qualifier combined with `COUNT()` function to calculate the number of unique car brands in rentals: + +[source,sql] +---- +SELECT COUNT (DISTINCT car) AS number_of_car_brands +FROM rental; +---- + +This returns the following output: + +[source,sql] +---- ++----------------------+ +| number_of_car_brands | ++----------------------+ +| 4 | ++----------------------+ +---- + +This example uses the `DISTINCT` qualifier combined with `COUNT()` function to calculate the number of rentals by each customer: + +[source,sql] +---- +SELECT c.cust_name AS customer_name, COUNT (DISTINCT r.rental_id) AS rental_count +FROM rental r +JOIN customer c ON r.customer_id = c.customer_id +GROUP BY c.cust_name; +---- + +This calculates the `rental_count` by each `customer_name`: + +[source,sql] +---- ++----------------+--------------+ +| customer_name | rental_count | ++----------------+--------------+ +| Nina | 2 | +| Aaron | 1 | +| Alice | 2 | +| Martha | 1 | +| Alex | 3 | +| Rosy | 1 | ++----------------+--------------+ +---- + +=== `DISTINCT` combined with `MAX()` function + +The following example uses `DISTINCT` qualifier combined with `MAX()` function to find maximum single spending per each customer, dropping any repeated transactions: + +[source,sql] +---- +SELECT c.cust_name AS customer_name, + MAX (DISTINCT r.total_price) AS max_spending +FROM rental r +JOIN customer c ON r.customer_id = c.customer_id +GROUP BY c.cust_name; +---- + +The query returns: + +[source,sql] +---- ++---------------+--------------+ +| customer_name | max_spending | ++---------------+--------------+ +| Martha | 3000 | +| Rosy | 1400 | +| Alex | 1400 | +| Alice | 2000 | +| Nina | 2000 | +| Aaron | 1500 | ++---------------+--------------+ +---- + +=== `DISTINCT` combined with `SUM()` function + +The following example compares the sum of unique revenues versus the sum of all revenues in rental data: + +[source,sql] +---- +SELECT + SUM (DISTINCT r.total_price) AS unique_revenue, + SUM (r.total_price) AS total_revenue +FROM rental r; +---- + +The query returns: + +[source,sql] +---- ++----------------+---------------+ +| unique_revenue | total_revenue | ++----------------+---------------+ +| 11400 | 16300 | ++----------------+---------------+ +---- + +The result may help to understand what is the impact of repeating transactions on total revenue. + +== Limitations + +There is one use case that is not currently supported: + +* Aggregation functions with `DISTINCT` keyword used as an argument of an expression, for example, +[source,sql] +---- +SELECT 1 + COUNT(DISTINCT col) FROM table +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/for-max.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/for-max.adoc new file mode 100644 index 000000000..b0e22450a --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/for-max.adoc @@ -0,0 +1,116 @@ += FOR_MAX() +:description: The FOR_MAX() function searches for a maximum in a specific column and returns a value related to that maximum from another column. +:page-topic-type: reference + +The `FOR_MAX()` function searches for a maximum in a specific column and returns a value related to that maximum from another column. + +== Syntax + +[source,sql] +---- +FOR_MAX(metric, value) +---- + +== Arguments + +* `metric`: Must be one of the following data types: `INT`, `LONG`, `FLOAT`, `DOUBLE`, `DATE` or `TIMESTAMP`. +* `value`: Can be any data type except `TEXT`. + +The `FOR_MAX()` function returns `NULL` in the following situations: + +* There are no input rows +* The `metric` column contains only `NULL` values +* The `value` corresponding to the metric minimum value is `NULL` + +This function also returns `NaN` (not-a-number) if the input contains a `NaN`. + +== Examples + +The following examples use a `payment` table that stores customer payment records, including any applied discounts: + +[source,sql] +---- +CREATE TABLE payments ( + paymentid int, + customer_name text, + price real, + discount real +); +INSERT INTO payments (paymentid, customer_name, price, discount) +VALUES +(1, 'Alex', 280.12, 0.1), +(2, NULL, 35.75, NULL), +(3, 'Alex', 45.1, 0.05), +(4, 'Alex', NULL, 0.4), +(5, 'John', NULL, 0.1), +(6, 'Bob', 50.45, 0.07), +(7, 'Bob', 120.5, 0.0); +---- + +To view the `payments` table content, run the query: + +[source,sql] +---- +SELECT * FROM payments; +---- + +[source,sql] +---- ++-----------+---------------+--------+----------+ +| paymentid | customer_name | price | discount | ++-----------+---------------+--------+----------+ +| 2 | | 35.75 | | +| 4 | Alex | | 0.4 | +| 3 | Alex | 45.1 | 0.05 | +| 1 | Alex | 280.12 | 0.1 | +| 6 | Bob | 50.45 | 0.07 | +| 5 | John | | 0.1 | +| 7 | Bob | 120.5 | 0 | ++-----------+---------------+--------+----------+ +---- + +=== `FOR_MAX()` basic usage + +To determine the price associated with the highest discount, run the code: + +[source,sql] +---- +SELECT FOR_MAX(discount, price) AS for_lowest_discount +FROM payments; +---- + +The query returns the following output: + +[source,sql] +---- ++---------------------+ +| for_lowest_discount | ++---------------------+ +| | ++---------------------+ +---- + +=== `FOR_MAX()` with `GROUP BY` clause + +This example uses a `GROUP BY` clause to group customers and then uses the `FOR_MAX()` function to get a discount for the highest price paid by each customer: + +[source,sql] +---- +SELECT customer_name, FOR_MAX(price, discount) AS discount +FROM payments +GROUP BY customer_name; +---- + +The query returns the following output: + +[source,sql] +---- ++---------------+----------+ +| customer_name | discount | ++---------------+----------+ +| | | +| Bob | 0 | +| Alex | 0.1 | +| John | | ++---------------+----------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/for-min.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/for-min.adoc new file mode 100644 index 000000000..e7791ceda --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/for-min.adoc @@ -0,0 +1,114 @@ += FOR_MIN() +:description: The FOR_MIN() function searches for a minimum in a specific column and returns a value related to that minimum from another column. +:page-topic-type: reference + +The `FOR_MIN()` function searches for a minimum in a specific column and returns a value related to that minimum from another column. + +== Syntax + +[source,sql] +---- +FOR_MIN(metric, value) +---- + +== Arguments + +* `metric`: Must be one of the following data types: `INT`, `LONG`, `FLOAT`, `DOUBLE`, `DATE` or `TIMESTAMP`. +* `value`: Can be any data type except `TEXT`. + +The `FOR_MIN()` function returns `NULL` in the following situations: + +* There are no input rows +* The `metric` column contains only `NULL` values +* The `value` corresponding to the metric minimum value is `NULL` + +This function also returns `NaN` (not-a-number) if the input contains a `NaN`. + +== Examples + +The following examples use a `payment` table that stores customer payment records, including any applied discounts: + +[source,sql] +---- +CREATE TABLE payments ( + paymentid int, + customer_name text, + price real, + discount real); + +INSERT INTO + payments (paymentid, customer_name, price, discount) +VALUES + (1, 'Alex', 280.12, 0.1), + (2, NULL, 35.75, NULL), + (3, 'Alex', 45.1, 0.05), + (4, 'Alex', NULL, 0.4), + (5, 'John', NULL, 0.1), + (6, 'Bob', 50.45, 0.07), + (7, 'Bob', 120.5, 0.0); +---- + +To view the `payments` table content, run the query: + +[source,sql] +---- +SELECT * FROM payments; +---- + +[source,sql] +---- + paymentid | customer_name | price | discount +-----------+---------------+--------+---------- + 1 | Alex | 280.12 | 0.1 + 2 | | 35.75 | + 3 | Alex | 45.1 | 0.05 + 4 | Alex | | 0.4 + 5 | John | | 0.1 + 6 | Bob | 50.45 | 0.07 + 7 | Bob | 120.5 | 0 +(7 rows) +---- + +=== `FOR_MIN()` basic usage + +To determine the price associated with the lowest discount applied across all payments, run the query: + +[source,sql] +---- +SELECT FOR_MIN(discount, price) AS for_lowest_discount FROM payments; +---- + +The query returns the following output: + +[source,sql] +---- + for_lowest_discount +--------------------- + 120.5 +(1 row) +---- + +=== `FOR_MIN()` with `GROUP BY` clause + +To determine the discount associated with the lowest price paid by each customer, use the `GROUP BY` clause with the `FOR_MIN()` function: + +[source,sql] +---- +SELECT customer_name, + FOR_MIN(price, discount) AS discount +FROM payments +GROUP BY customer_name; +---- + +This query returns the following output: + +[source,sql] +---- +customer_name | discount +---------------+---------- + Bob | 0.07 + Alex | 0.05 + | + John | +(4 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/index.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/index.adoc new file mode 100644 index 000000000..57e401e94 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/index.adoc @@ -0,0 +1,29 @@ += Overview +:description: Aggregate functions compute a single result from a set of input values. + +Aggregate functions compute a single result from a set of input values. Redpanda SQL supports the following aggregate functions: + +[width="100%",cols="36%,64%",options="header",] +|=== +|Function Name |Description +|xref:reference:sql/sql-functions/aggregate-functions/sum.adoc[SUM] |Calculates and returns the sum of all values +|xref:reference:sql/sql-functions/aggregate-functions/min.adoc[MIN] |Calculates and returns the minimum value +|xref:reference:sql/sql-functions/aggregate-functions/for-min.adoc[FOR_MIN] |Calculates and returns a value corresponding to the minimal metric in the same row from a set of values +|xref:reference:sql/sql-functions/aggregate-functions/max.adoc[MAX] |Calculates and returns the maximum value +|xref:reference:sql/sql-functions/aggregate-functions/for-max.adoc[FOR_MAX] |Calculates and Returns a value corresponding to the maximum metric in the same row from a set of values +|xref:reference:sql/sql-functions/aggregate-functions/avg.adoc[AVG] |Calculates and returns the average value +|xref:reference:sql/sql-functions/aggregate-functions/count.adoc[COUNT] |Counts the number of rows +|xref:reference:sql/sql-functions/aggregate-functions/bool-and.adoc[BOOL_AND] |Calculates the boolean of all the boolean values in the aggregated group. `FALSE` if at least one of aggregated rows is `FALSE` +|xref:reference:sql/sql-functions/aggregate-functions/bool-or.adoc[BOOL_OR] |Calculates the boolean of all the boolean values in the aggregated group. `TRUE` if at least one of aggregated rows is `TRUE` +|=== + +[width="100%",cols="36%,64%",options="header",] +|=== +|Function qualifier |Description +|xref:reference:sql/sql-functions/aggregate-functions/distinct.adoc[DISTINCT] |Allows aggregation functions to operate on a distinct set of values within a column +|=== + +[TIP] +==== +You can utilize the aggregate functions with the `GROUP BY` and `HAVING` clauses in the `SELECT` statement. +==== diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/max.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/max.adoc new file mode 100644 index 000000000..256158401 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/max.adoc @@ -0,0 +1,118 @@ += MAX +:description: MAX() is a function that returns the maximum value from a set of records. +:page-topic-type: reference + +`MAX()` is a function that returns the maximum value from a set of records. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +MAX(column_name) +---- + +This function's output data type will always be the same as the input one, however it returns `NULL` if there are no records or input consists of `NULL` values and it also returns `NaN` if the input contains a `NaN`. + +== Examples + +The following examples use a movies table that stores movie details, such as movie's title, category, and IMDb rating. + +[source,sql] +---- +CREATE TABLE movies ( + movieid int, + moviename text, + moviecategory text, + imdbrating real +); +INSERT INTO movies (movieid, moviename, moviecategory, imdbrating) +VALUES +(8557411, 'The Shawshank Redemption', 'Drama', 9.4), +(8557421, 'Life Is Beautiful', 'Romance', 8.4), +(8557451, 'The Godfather', 'Crime', 9.3), +(8557311, 'Prisoners', 'Thriller', 8.5), +(8557321, 'Inception', 'Science Fiction', 9), +(8557351, 'The Dark Knight', 'Action', 9.2), +(8557221, 'Coco', 'Drama', 8.2), +(8557251, 'The Sixth Sense', 'Horror', 8.1), +(8557231, 'Kill Bill: Vol. 1', 'Action', 8.1), +(8557281, 'The Notebook', 'Romance', 7.8), +(8557291, 'Forrest Gump', 'Drama', 8); +---- + +[source,sql] +---- +SELECT * FROM movies; +---- + +The query returns: + +[source,sql] +---- ++---------+--------------------------+-----------------+-------------+ +| movieid | moviename | moviecategory | imdbrating | ++---------+--------------------------+-----------------+-------------+ +| 8557411 | The Shawshank Redemption | Drama | 9.4 | +| 8557421 | Life Is Beautiful | Romance | 8.4 | +| 8557451 | The Godfather | Crime | 9.3 | +| 8557311 | Prisoners | Thriller | 8.5 | +| 8557321 | Inception | Science Fiction | 9 | +| 8557351 | The Dark Knight | Action | 9.2 | +| 8557221 | Coco | Drama | 8.2 | +| 8557251 | The Sixth Sense | Horror | 8.1 | +| 8557231 | Kill Bill: Vol. 1 | Action | 8.1 | +| 8557281 | The Notebook | Romance | 7.8 | +| 8557291 | Forrest Gump | Drama | 8 | ++---------+--------------------------+-----------------+-------------+ +---- + +=== `MAX()` with a single expression + +For example, you might want to know what is the highest rating among all stored movies: + +[source,sql] +---- +SELECT MAX(imdbRating) AS "Highest Rating" +FROM movies; +---- + +[source,sql] +---- ++-----------------+ +| Highest Rating | ++-----------------+ +| 9.4 | ++-----------------+ +---- + +=== `MAX()` with GROUP BY clause + +This example uses a `MAX()` function to get the highest rating in each movie category and the results are ordered by the rating in ascending order. + +[source,sql] +---- +SELECT + movieCategory AS "Movie Category", + MAX(imdbRating) AS "Highest Rating" +FROM movies +GROUP BY movieCategory +ORDER BY MAX(imdbRating) ASC; +---- + +This returns the highest rating from a group of `movieCategory`: + +[source,bash] +---- + Movie Category | Highest Rating +-----------------+---------------- + Horror | 8.1 + Romance | 8.4 + Thriller | 8.5 + Science Fiction | 9 + Action | 9.2 + Crime | 9.3 + Drama | 9.4 +(7 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/min.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/min.adoc new file mode 100644 index 000000000..ec0a57bdb --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/min.adoc @@ -0,0 +1,118 @@ += MIN +:description: MIN() is a function that returns the minimum value from a set of records. +:page-topic-type: reference + +`MIN()` is a function that returns the minimum value from a set of records. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +MIN(column_name) +---- + +This function's output data type will always be the same as the input one, however it returns `NULL` if there are no records or input consists of `NULL` values and it also returns `NaN` if the input contains a `NaN`. + +== Examples + +The following examples use a movies table that stores movie details, such as movie's title, category, and IMDb rating. + +[source,sql] +---- +CREATE TABLE movies ( + movieid int, + moviename text, + moviecategory text, + imdbrating real +); +INSERT INTO movies (movieid, moviename, moviecategory, imdbrating) +VALUES +(8557411, 'The Shawshank Redemption', 'Drama', 9.4), +(8557421, 'Life Is Beautiful', 'Romance', 8.4), +(8557451, 'The Godfather', 'Crime', 9.3), +(8557311, 'Prisoners', 'Thriller', 8.5), +(8557321, 'Inception', 'Science Fiction', 9), +(8557351, 'The Dark Knight', 'Action', 9.2), +(8557221, 'Coco', 'Drama', 8.2), +(8557251, 'The Sixth Sense', 'Horror', 8.1), +(8557231, 'Kill Bill: Vol. 1', 'Action', 8.1), +(8557281, 'The Notebook', 'Romance', 7.8), +(8557291, 'Forrest Gump', 'Drama', 8); +---- + +[source,sql] +---- +SELECT * FROM movies; +---- + +The query returns: + +[source,sql] +---- ++---------+--------------------------+-----------------+-------------+ +| movieid | moviename | moviecategory | imdbrating | ++---------+--------------------------+-----------------+-------------+ +| 8557411 | The Shawshank Redemption | Drama | 9.4 | +| 8557421 | Life Is Beautiful | Romance | 8.4 | +| 8557451 | The Godfather | Crime | 9.3 | +| 8557311 | Prisoners | Thriller | 8.5 | +| 8557321 | Inception | Science Fiction | 9 | +| 8557351 | The Dark Knight | Action | 9.2 | +| 8557221 | Coco | Drama | 8.2 | +| 8557251 | The Sixth Sense | Horror | 8.1 | +| 8557231 | Kill Bill: Vol. 1 | Action | 8.1 | +| 8557281 | The Notebook | Romance | 7.8 | +| 8557291 | Forrest Gump | Drama | 8 | ++---------+--------------------------+-----------------+-------------+ +---- + +=== `MIN()` with a single expression + +For example, you might want to know what is the lowest rating of all stored movies: + +[source,sql] +---- +SELECT MIN(imdbRating) AS "Lowest Rating" +FROM movies; +---- + +[source,sql] +---- ++----------------+ +| Lowest Rating | ++----------------+ +| 7.8 | ++----------------+ +---- + +=== `MIN()` with `GROUP BY` clause + +This example uses a `GROUP BY` clause to group the movie categories, then uses the `MIN()` function to get the lowest rating in each movie category and arrange the results in ascending order. + +[source,sql] +---- +SELECT + movieCategory AS "Movie Category", + MIN(imdbRating) AS "Lowest Rating" +FROM movies +GROUP BY movieCategory +ORDER BY MIN(imdbRating) ASC; +---- + +The query returns: + +[source,bash] +---- + Movie Category | Lowest Rating +-----------------+--------------- + Romance | 7.8 + Drama | 8 + Horror | 8.1 + Action | 8.1 + Thriller | 8.5 + Science Fiction | 9 + Crime | 9.3 +(7 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/index.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/index.adoc new file mode 100644 index 000000000..9fbdd7853 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/index.adoc @@ -0,0 +1,3 @@ += Ordered-Set Aggregate Functions +:description: Reference for ordered-set aggregate functions in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/mode.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/mode.adoc new file mode 100644 index 000000000..963fad42b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/mode.adoc @@ -0,0 +1,71 @@ += MODE() +:description: MODE() is an ordered-set aggregate function that returns the most frequently occurring value (the mode) from a set of values. +:page-topic-type: reference + +`MODE()` is an ordered-set aggregate function that returns the most frequently occurring value (the mode) from a set of values. + +== Syntax + +[source,sql] +---- +MODE() WITHIN GROUP (ORDER BY order_list) +---- + +[NOTE] +==== +Null values are ignored during the calculation. If `NULL` is the most frequent value, the function returns the second most common value. +==== + +== Parameters + +* `()`: This function does not take any arguments, but the parentheses are required. + +== Examples + +The following example uses a simplified version of the `film` table from the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila database^], containing only the `title`, `length` and `rating` columns. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The following query retrieves the most frequent ratings found in the film table: + +[source,sql] +---- +SELECT MODE() + WITHIN GROUP (ORDER BY rating) +FROM film; +---- + +The query returns: + +[source,sql] +---- +| mode | +|-------| +| NC-17 | +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-cont.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-cont.adoc new file mode 100644 index 000000000..0c48e7641 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-cont.adoc @@ -0,0 +1,98 @@ += PERCENTILE_CONT() +:description: PERCENTILE_CONT() is an ordered-set aggregate function used to compute continuous percentiles from a set of values. +:page-topic-type: reference + +`PERCENTILE_CONT()` is an ordered-set aggregate function used to compute continuous percentiles from a set of values. The continuous percentile returns an interpolated value based on the distribution of the input data, while multiple continuous percentiles return an array of results matching the shape of the `fractions` parameter with each non-null element replaced by the value corresponding to that percentile. + +== Syntax + +The syntax for this function is: + +[tabs] +==== +Continuous Percentile:: ++ +[source,sql] +---- +PERCENTILE_CONT(fraction) WITHIN GROUP (ORDER BY order_list) +---- ++ +[NOTE] +==== +This function is often used in conjunction with the `WITHIN GROUP` clause to specify how to order the data before calculating the percentile. +==== ++ +Parameters + +* `fraction`: Decimal value between 0 and 1 representing the desired percentile (for example, 0.25 for the 25th percentile). + +Multiple Continuous Percentile:: ++ +[source,sql] +---- +PERCENTILE_CONT(fractions) WITHIN GROUP (ORDER BY order_list) +---- ++ +[NOTE] +==== +This function is often used in conjunction with the `WITHIN GROUP` clause to specify how to order the data before calculating the percentile. +==== ++ +Parameters + +* `fractions`: Array of decimal values between 0 and 1 representing the desired percentiles (for example, `ARRAY[0.25, 0.50, 0.75, 0.90]`). + +==== + +== Examples + +The following example uses a simplified version of the `film` table from the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila database^], containing only the `title`, `length` and `rating` columns. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +This query calculates the median film length within each rating category. + +[source,sql] +---- +SELECT rating, PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY length) AS "50th percentile" FROM film +GROUP BY rating; +---- + +The query returns: + +[source,sql] +---- + rating | 50th percentile +--------+----------------- + PG-13 | 112.5 + PG | 121 + NC-17 | 150 + G | 77 +(4 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-disc.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-disc.adoc new file mode 100644 index 000000000..23a6f9bfc --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-disc.adoc @@ -0,0 +1,98 @@ += PERCENTILE_DISC() +:description: PERCENTILE_DISC() is an ordered-set aggregate function used to compute discrete percentiles from a set of values. +:page-topic-type: reference + +`PERCENTILE_DISC()` is an ordered-set aggregate function used to compute discrete percentiles from a set of values. The discrete percentile returns the first input value, which position in the ordering equals or exceeds the specified fraction, while multiple discrete percentiles return an array of results matching the shape of the fractions parameter, with each non-null element being replaced by the input value corresponding to that percentile. + +== Syntax + +The syntax for this function is: + +[tabs] +==== +Discrete Percentile:: ++ +[source,sql] +---- +PERCENTILE_DISC(fraction) WITHIN GROUP (ORDER BY order_list) +---- ++ +[NOTE] +==== +If multiple values share the same rank at the specified percentile, `PERCENTILE_DISC()` returns the first one encountered in the ordering. +==== ++ +Parameters + +* `fraction`: Decimal value between 0 and 1 representing the desired percentile (for example, 0.25 for the 25th percentile). + +Multiple Discrete Percentile:: ++ +[source,sql] +---- +PERCENTILE_DISC(fractions) WITHIN GROUP (ORDER BY order_list) +---- ++ +[NOTE] +==== +If multiple values share the same rank at the specified percentile, `PERCENTILE_DISC` returns the first one encountered in the ordering. +==== ++ +Parameters + +* `fractions`: Array of decimal values between 0 and 1 representing the desired percentiles (for example, `ARRAY[0.25, 0.50, 0.75, 0.90]`). + +==== + +== Examples + +The following example uses a simplified version of the `film` table from the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila database^], containing only the `title`, `length` and `rating` columns. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The following query calculates the quartile, median and the third quartile of film lengths: + +[source,sql] +---- +SELECT rating, percentile_disc(ARRAY[0.25, 0.5, 0.75]) WITHIN GROUP (ORDER BY length) AS "quartiles" FROM film +GROUP BY rating; +---- + +The query returns: + +[source,sql] +---- + rating | quartiles +--------+--------------- + G | {54,77,125} + PG | {106,121,137} + PG-13 | {47,83,142} + NC-17 | {131,150,176} +(4 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/corr.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/corr.adoc new file mode 100644 index 000000000..e8c589cfd --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/corr.adoc @@ -0,0 +1,70 @@ += CORR() +:description: The CORR() aggregate function calculates the Pearson correlation coefficient between two sets of number pairs. +:page-topic-type: reference + +The `CORR()` aggregate function calculates the Pearson correlation coefficient between two sets of number pairs. This function measures the linear relationship between two variables, providing a value between -1 and 1. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +CORR(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `CORR()` function to calculate the correlation between film length and rating: + +[source,sql] +---- +SELECT + CORR(length, rating) AS CorrelationCoefficient +FROM film; +---- + +The query returns: + +[source,sql] +---- + correlationcoefficient +------------------------ + 0.6190587870867634 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-pop.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-pop.adoc new file mode 100644 index 000000000..1391e0912 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-pop.adoc @@ -0,0 +1,70 @@ += COVAR_POP() +:description: The COVAR_POP() aggregate function calculates the population covariance between two sets of number pairs. +:page-topic-type: reference + +The `COVAR_POP()` aggregate function calculates the population covariance between two sets of number pairs. This function measures how much two variables change together, providing insight into their linear relationship. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +COVAR_POP(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `COVAR_POP()` function to calculate the covariance between film length and rating: + +[source,sql] +---- +SELECT + COVAR_POP(length, rating) AS Covariance +FROM film; +---- + +The query returns: + +[source,sql] +---- + covariance +------------------- + 36.02768166089963 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-samp.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-samp.adoc new file mode 100644 index 000000000..7857f8149 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-samp.adoc @@ -0,0 +1,71 @@ += COVAR_SAMP +:description: The COVAR_SAMP() aggregate function calculates the sample covariance between two sets of number pairs. +:page-topic-type: reference + +The `COVAR_SAMP()` aggregate function calculates the sample covariance between two sets of number pairs. This function measures how changes in one variable relate linearly to changes in another variable within a sample dataset. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +COVAR_SAMP(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +This example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `COVAR_SAMP()` function to calculate the sample covariance between film `length` and `rating` where `rating` is greater than or equal to 4: + +[source,sql] +---- +SELECT + COVAR_SAMP(length, rating) AS SampleCovariance +FROM film +WHERE rating >= 4; +---- + +The query returns: + +[source,sql] +---- + samplecovariance +-------------------- + 23.087912087912066 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/index.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/index.adoc new file mode 100644 index 000000000..9cd3aa026 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/index.adoc @@ -0,0 +1,27 @@ += Overview +:description: Aggregate functions for statistics are typically used for statistical analysis. + +Aggregate functions for statistics are typically used for statistical analysis. Redpanda SQL supports the following functions: + +[width="100%",cols="42%,58%",options="header",] +|=== +|Functions |Description +|xref:reference:sql/sql-functions/aggregate-functions/statistics/corr.adoc[CORR] |Calculates the Pearson correlation coefficient between two sets of number pairs +|xref:reference:sql/sql-functions/aggregate-functions/statistics/covar-pop.adoc[COVAR_POP] |Calculates the population covariance between two sets of number pairs +|xref:reference:sql/sql-functions/aggregate-functions/statistics/covar-samp.adoc[COVAR_SAMP] |Calculates the sample covariance between two sets of number pairs +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-avgx.adoc[REGR_AVGX] |Calculates the average of the independent variable (sum(X)/N) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-avgy.adoc[REGR_AVGY] |Calculates the average of the dependent variable (sum(Y)/N) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-count.adoc[REGR_COUNT] |Calculates the number of input rows in which both expressions are non-null +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-intercept.adoc[REGR_INTERCEPT] |Calculates the y-intercept of the univariate linear regression line for a group of data points +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-r2.adoc[REGR_R2] |Calculates the coefficient of determination (R2) for a linear regression model +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-slope.adoc[REGR_SLOPE] |Calculates slope of the least-squares-fit linear equation determined by the (X, Y) pairs +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-sxx.adoc[REGR_SXX] |Calculates the sum(X2) - sum(X)2/N ("`sum of squares`" of the independent variable) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-sxy.adoc[REGR_SXY] |Calculates the sum(X_Y) - sum(X)_ sum(Y)/N ("`sum of products`" of independent times dependent variable) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-syy.adoc[REGR_SYY] |Calculates the sum(Y2) - sum(Y)2/N ("`sum of squares`" of the dependent variable) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev.adoc[STDDEV] |Calculates the sample standard deviation of a set of numeric values +|xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev-pop.adoc[STDDEV_POP] |Calculates the population standard deviation of the input values +|xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev-samp.adoc[STDDEV_SAMP] |Calculates the sample standard deviation of the input values +|xref:reference:sql/sql-functions/aggregate-functions/statistics/variance.adoc[VARIANCE] |Calculates the the sample variance of a set of numeric values. +|xref:reference:sql/sql-functions/aggregate-functions/statistics/var-pop.adoc[VAR_POP] |Calculates the population variance of the input values (square of the population standard deviation) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/var-samp.adoc[VAR_SAMP] |Calculates the sample variance of the input values (square of the sample standard deviation) +|=== diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgx.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgx.adoc new file mode 100644 index 000000000..e5ae969f7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgx.adoc @@ -0,0 +1,70 @@ += REGR_AVGX() +:description: The REGR_AVGX() aggregate function calculates the average of the independent variable (x) for non-null pairs of dependent (y) and independent (x) vari +:page-topic-type: reference + +The `REGR_AVGX()` aggregate function calculates the average of the independent variable (x) for non-null pairs of dependent (y) and independent (x) variables. This function is commonly used in linear regression analysis to compute the mean of the independent variable where both variables are not NULL. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +REGR_AVGX(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `REGR_AVGX()` function to calculate the average rating for films where both `length` and `rating` are not NULL: + +[source,sql] +---- +SELECT + REGR_AVGX(length, rating) AS AverageRating +FROM film; +---- + +The query returns: + +[source,sql] +---- + averagerating +------------------- + 5.294117647058823 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgy.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgy.adoc new file mode 100644 index 000000000..979b523b5 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgy.adoc @@ -0,0 +1,70 @@ += REGR_AVGY() +:description: The REGR_AVGY() aggregate function calculates the mean of the dependent variable (y) for non-null pairs of dependent (y) and independent (x) variables +:page-topic-type: reference + +The `REGR_AVGY()` aggregate function calculates the mean of the dependent variable (y) for non-null pairs of dependent (y) and independent (x) variables. This function is used in linear regression analysis to compute the average value of the dependent variable where both variables are not NULL. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +REGR_AVGY(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `REGR_AVGY()` function to calculate the mean of the dependent variable (`rating`) for rows where both `rating` and `length` are not NULL: + +[source,sql] +---- +SELECT + REGR_AVGY(rating, length) AS AverageRating +FROM film; +---- + +The query returns: + +[source,sql] +---- + averagerating +------------------- + 5.294117647058823 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-count.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-count.adoc new file mode 100644 index 000000000..bc3141da4 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-count.adoc @@ -0,0 +1,70 @@ += REGR_COUNT() +:description: The REGR_COUNT() aggregate function calculates the number of non-null value pairs for a dependent variable (y) and an independent variable (x). +:page-topic-type: reference + +The `REGR_COUNT()` aggregate function calculates the number of non-null value pairs for a dependent variable (y) and an independent variable (x). This function is used in linear regression analysis to determine the number of valid data points available for computation. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +REGR_COUNT(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `REGR_COUNT()` function to count the number of rows where both `rating` and `length` are not NULL: + +[source,sql] +---- +SELECT + REGR_COUNT(rating, length) AS NonNullPairsCount +FROM film; +---- + +The query returns: + +[source,sql] +---- + nonnullpairscount +------------------- + 17 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-intercept.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-intercept.adoc new file mode 100644 index 000000000..4aa70b705 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-intercept.adoc @@ -0,0 +1,70 @@ += REGR_INTERCEPT() +:description: The REGR_INTERCEPT() aggregate function calculates the y-intercept of the univariate linear regression line for a group of data points, where the depe +:page-topic-type: reference + +The `REGR_INTERCEPT()` aggregate function calculates the y-intercept of the univariate linear regression line for a group of data points, where the dependent variable is (y) and the independent variable is (x). The intercept is the point where the regression line crosses the y-axis when x=0. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +REGR_INTERCEPT(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `REGR_INTERCEPT()` function to calculate the y-intercept of the regression line for valid pairs of `rating` and `length`: + +[source,sql] +---- +SELECT + REGR_INTERCEPT(rating, length) AS YIntercept +FROM film; +---- + +The query returns: + +[source,sql] +---- + yintercept +-------------------- + 2.1055200882495355 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-r2.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-r2.adoc new file mode 100644 index 000000000..20ca8bf02 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-r2.adoc @@ -0,0 +1,70 @@ += REGR_R2() +:description: The REGR_R2() aggregate function calculates the coefficient of determination (R2) for a linear regression model. +:page-topic-type: reference + +The `REGR_R2()` aggregate function calculates the coefficient of determination (R2) for a linear regression model. The R2 value indicates how well the independent variable (x) explains the variability of the dependent variable (y). + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +REGR_R2(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +This example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `REGR_R2()` function to calculate the coefficient of determination (R2) for valid pairs of `rating` and `length`: + +[source,sql] +---- +SELECT + REGR_R2(rating, length) AS coefficientOfDetermination +FROM film; +---- + +The query returns: + +[source,sql] +---- + coefficientofdetermination +---------------------------- + 0.3832337818693347 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-slope.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-slope.adoc new file mode 100644 index 000000000..54df6656a --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-slope.adoc @@ -0,0 +1,70 @@ += REGR_SLOPE() +:description: The REGR_SLOPE() aggregate function calculates the slope of the regression line for a linear relationship between a dependent variable (y) and an inde +:page-topic-type: reference + +The `REGR_SLOPE()` aggregate function calculates the slope of the regression line for a linear relationship between a dependent variable (y) and an independent variable (x). The slope represents the rate of change in `y` for every unit increase in `x`. This function is used in regression analysis to quantify the strength and direction of a linear relationship. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +REGR_SLOPE(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `REGR_SLOPE()` function to calculate the slope of the regression line for valid pairs of `rating` and `length`: + +[source,sql] +---- +SELECT + REGR_SLOPE(rating, length) AS Slope +FROM film; +---- + +The query returns: + +[source,sql] +---- + slope +---------------------- + 0.025985694391063227 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxx.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxx.adoc new file mode 100644 index 000000000..3df266edb --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxx.adoc @@ -0,0 +1,70 @@ += REGR_SXX() +:description: The REGR_SXX() aggregate function calculates the sum of squares of deviations for the independent variable (x) in a linear regression analysis. +:page-topic-type: reference + +The `REGR_SXX()` aggregate function calculates the sum of squares of deviations for the independent variable (x) in a linear regression analysis. This value represents the variability of the independent variable and is a key component in calculating the slope and other regression statistics. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +REGR_SXX(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `REGR_SXX()` function to calculate the sum of squares of deviations for the independent variable `length`: + +[source,sql] +---- +SELECT + REGR_SXX(rating, length) AS SumOfSquaresX +FROM film; +---- + +The query returns: + +[source,sql] +---- + sumofsquaresx +------------------ + 23569.5294117647 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxy.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxy.adoc new file mode 100644 index 000000000..45cac9d69 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxy.adoc @@ -0,0 +1,70 @@ += REGR_SXY() +:description: The REGR_SXY() aggregate function calculates the sum of products od deviations for the dependent variable (y) and the independent variable (x) in a li +:page-topic-type: reference + +The `REGR_SXY()` aggregate function calculates the sum of products od deviations for the dependent variable (y) and the independent variable (x) in a linear regression analysis. This value represents the covariance-like term used to compute the slope of the regression line. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +REGR_SXY(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +This example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +This query uses the `REGR_SXY()` function to calculate the sum of products of deviations for non-null pair of `rating` and `length`: + +[source,sql] +---- +SELECT + REGR_SXY(rating, length) AS SumOfSquaresXY +FROM film; +---- + +The query returns: + +[source,sql] +---- + sumofsquaresxy +------------------- + 612.4705882352937 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-syy.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-syy.adoc new file mode 100644 index 000000000..002e90046 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-syy.adoc @@ -0,0 +1,70 @@ += REGR_SYY() +:description: The REGR_SYY() aggregate function calculates the sum of squares of deviations for the dependent variable (y) in a linear regression analysis. +:page-topic-type: reference + +The `REGR_SYY()` aggregate function calculates the sum of squares of deviations for the dependent variable (y) in a linear regression analysis. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +REGR_SYY(y, x) +---- + +== Parameters + +* `y`: Variable being predicted. +* `x`: Variable used for prediction. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `REGR_SYY()` function to calculate the sum of squares of deviation for the dependent variable `rating`: + +[source,sql] +---- +SELECT + REGR_SYY(rating, length) AS SumOfSquaresY +FROM film; +---- + +The query returns: + +[source,sql] +---- + sumofsquaresy +-------------------- + 41.529411764705856 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-pop.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-pop.adoc new file mode 100644 index 000000000..abf8a982e --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-pop.adoc @@ -0,0 +1,69 @@ += STDDEV_POP() +:description: The STDDEV_POP() aggregate function calculates the population stardard deviation of a set of numeric values. +:page-topic-type: reference + +The `STDDEV_POP()` aggregate function calculates the population stardard deviation of a set of numeric values. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +STDDEV_POP(expression) +---- + +== Parameters + +* `expression`: Numeric expression or column. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `STDDEV_POP()` function to calculate the population standard deviation for the `length` column: + +[source,sql] +---- +SELECT + STDDEV_POP(length) AS LengthPopStdDev +FROM film; +---- + +The query returns: + +[source,sql] +---- + lengthpopstddev +------------------- + 37.23496886764368 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-samp.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-samp.adoc new file mode 100644 index 000000000..c50947cf0 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-samp.adoc @@ -0,0 +1,70 @@ += STDDEV_SAMP() +:description: The STDDEV_SAMP() aggregate function calculates the sample standard deviation of a set of numeric values. +:page-topic-type: reference + +The `STDDEV_SAMP()` aggregate function calculates the sample standard deviation of a set of numeric values. This function measures how much the values deviate from their mean, assuming the data is a sample of a larger population. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +STDDEV_SAMP(expression) +---- + +== Parameters + +* `expression`: Numeric expression or column. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `STDDEV_SAMP()` function to calculate the sample standard deviation for the `length` column where `rating` is greater than or equal to 4: + +[source,sql] +---- +SELECT + STDDEV_SAMP(length) AS LengthSampleStdDev +FROM film +WHERE rating >= 4; +---- + +The query returns: + +[source,sql] +---- + lengthsamplestddev +-------------------- + 34.92503746251735 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev.adoc new file mode 100644 index 000000000..c261397bd --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev.adoc @@ -0,0 +1,69 @@ += STDDEV() +:description: The STDDEV() aggregate function calculates the sample standard deviation of a set of numeric values. +:page-topic-type: reference + +The `STDDEV()` aggregate function calculates the sample standard deviation of a set of numeric values. Standard deviation measures the dispersion or spread of data points around the mean. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +STDDEV(expression) +---- + +== Parameters + +* `expression`: Numeric expression or column. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `STDDEV()` function to calculate the sample standard deviation for the `length` column: + +[source,sql] +---- +SELECT + STDDEV(length) AS LengthStdDev +FROM film; +---- + +The query returns: + +[source,sql] +---- + lengthstddev +------------------- + 38.38092740197003 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-pop.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-pop.adoc new file mode 100644 index 000000000..a3be4550d --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-pop.adoc @@ -0,0 +1,69 @@ += VAR_POP() +:description: The VAR_POP() aggregate function calculates the population variance of a set of numeric values. +:page-topic-type: reference + +The `VAR_POP()` aggregate function calculates the population variance of a set of numeric values. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +VAR_POP(expression) +---- + +== Parameters + +* `expression`: Numeric expression or column. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `VAR_POP()` function to calculate the population variance for the `length` column: + +[source,sql] +---- +SELECT + VAR_POP(length) AS LengthPopulationVariance +FROM film; +---- + +The query returns: + +[source,sql] +---- + lengthpopulationvariance +-------------------------- + 1386.442906574394 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-samp.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-samp.adoc new file mode 100644 index 000000000..d3992438c --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-samp.adoc @@ -0,0 +1,70 @@ += VAR_SAMP() +:description: The VAR_SAMP() aggregate function calculates the sample variance of a set of numeric values. +:page-topic-type: reference + +The `VAR_SAMP()` aggregate function calculates the sample variance of a set of numeric values. This function measures the spread of data points around the mean, assuming the data is a sample of a larger population. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +VAR_SAMP(expression) +---- + +== Parameters + +* `expression`: Numeric expression or column. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `VAR_SAMP()` function to calculate the sample variance for the `length` column where `rating` is greater than or equal to 4: + +[source,sql] +---- +SELECT + VAR_SAMP(length) AS LengthSampleVariance +FROM film +WHERE rating >= 4; +---- + +The query returns: + +[source,sql] +---- + lengthsamplevariance +---------------------- + 1219.7582417582407 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/variance.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/variance.adoc new file mode 100644 index 000000000..c5616d5ee --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/variance.adoc @@ -0,0 +1,69 @@ += VARIANCE() +:description: The VARIANCE() aggregate function calculate the sample variance of a set of numeric values. +:page-topic-type: reference + +The `VARIANCE()` aggregate function calculate the sample variance of a set of numeric values. Variance measures the spread of data points around the mean, providing insight into how much the values deviate from the average. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +VARIANCE(expression) +---- + +== Parameters + +* `expression`: Numeric expression or column. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The following query uses the `VARIANCE()` function to calculate the variance for the `length` column: + +[source,sql] +---- +SELECT + VARIANCE(length) AS LengthVariance +FROM film; +---- + +The query returns: + +[source,sql] +---- + lengthvariance +-------------------- + 1473.0955882352937 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/sum.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/sum.adoc new file mode 100644 index 000000000..5d6bd8156 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/sum.adoc @@ -0,0 +1,253 @@ += SUM +:description: SUM() calculates the sum of values from stored records. +:page-topic-type: reference + +`SUM()` calculates the sum of values from stored records. `SUM()` doesn't consider `NULL` in the calculation, and it returns `NULL` instead of zero if the executed statement returns no rows. + +The supported input and return types are listed in the following table. + +[cols=",",options="header",] +|=== +|Input type |Return type +|INT |LONG +|LONG |LONG +|FLOAT |DOUBLE +|DOUBLE |DOUBLE +|INTERVAL |INTERVAL +|=== + +[NOTE] +==== +If the input type is 32-bit, then the result is 64-bit. +==== + +== Examples + +The following two sample tables are used in these examples: + +*customer table* + +[source,sql] +---- +CREATE TABLE customer ( + customer_id int, + cust_name text +); +INSERT INTO customer + (customer_id, cust_name) +VALUES + (11112, 'Alex'), + (11113, 'Aaron'), + (11114, 'Alice'), + (11115, 'Nina'), + (11116, 'Rosy'), + (11117, 'Martha'), + (11118, 'John'); +---- + +[source,sql] +---- +SELECT * FROM customer; +---- + +This creates the following table: + +[source,sql] +---- ++-------------+-----------+ +| customer_id | cust_name | ++-------------+-----------+ +| 11112 | Alex | +| 11113 | Aaron | +| 11114 | Alice | +| 11115 | Nina | +| 11116 | Rosy | +| 11117 | Martha | +| 11118 | John | ++-------------+-----------+ +---- + +*rental table* + +[source,sql] +---- +CREATE TABLE rental ( + rental_id int, + rental_date timestamp, + return_date timestamp, + car text, + customer_id int, + total_price int +); +INSERT INTO rental (rental_id, rental_date, return_date, car, customer_id, total_price) +VALUES +(8557411, '2022-04-02 09:10:19', '2022-04-10 10:15:05', 'Audi', 11112, 1400), +(8557421, '2022-04-06 07:00:30', '2022-04-19 07:10:19', 'BMW', 11115, 2000), +(8557451, '2022-04-19 08:00:20', '2022-04-24 08:05:00', 'Cadillac', 11112, 1000), +(8557311, '2022-05-11 09:15:28', '2022-05-18 09:00:18', 'Audi', 11115, 1500), +(8557321, '2022-05-20 10:12:22', '2022-05-28 10:08:48', 'Audi', 11113, 1500), +(8557351, '2022-06-10 12:18:09', '2022-06-20 18:12:23', 'Cadillac', 11114, 1200), +(8557221, '2022-06-17 14:02:02', '2022-06-20 14:17:02', 'Chevrolet', 11112, 1300), +(8557251, '2022-07-12 05:19:49', '2022-07-19 07:15:28', 'Chevrolet', 11116, 1400), +(8557231, '2022-08-09 09:29:08', '2022-08-24 09:30:58', 'Cadillac', 11114, 2000), +(8557291, '2022-08-18 15:15:20', '2022-09-01 15:30:19', 'BMW', 11117, 3000); +---- + +[source,sql] +---- +SELECT * FROM rental; +---- + +The rental table stores the details for car rental: + +[source,sql] +---- ++------------+---------------------+---------------------+-----------+---------------+-------------+ +| rental_id | rental_date | return_date | car | customer_id | total_price | ++------------+---------------------+---------------------+-----------+---------------+-------------+ +| 8557411 | 2022-04-02 09:10:19 | 2022-04-10 10:15:05 | Audi | 11112 | 1400 | +| 8557421 | 2022-04-06 07:00:30 | 2022-04-19 07:10:19 | BMW | 11115 | 2000 | +| 8557451 | 2022-04-19 08:00:20 | 2022-04-24 08:05:00 | Cadillac | 11112 | 1000 | +| 8557311 | 2022-05-11 09:15:28 | 2022-05-18 09:00:18 | Audi | 11115 | 1500 | +| 8557321 | 2022-05-20 10:12:22 | 2022-05-28 10:08:48 | Audi | 11113 | 1500 | +| 8557351 | 2022-06-10 12:18:09 | 2022-06-20 18:12:23 | Cadillac | 11114 | 1200 | +| 8557221 | 2022-06-17 14:02:02 | 2022-06-20 14:17:02 | Chevrolet | 11112 | 1300 | +| 8557251 | 2022-07-12 05:19:49 | 2022-07-19 07:15:28 | Chevrolet | 11116 | 1400 | +| 8557231 | 2022-08-09 09:29:08 | 2022-08-24 09:30:58 | Cadillac | 11114 | 2000 | +| 8557291 | 2022-08-18 15:15:20 | 2022-09-01 15:30:19 | BMW | 11117 | 3000 | ++------------+---------------------+---------------------+-----------+---------------+-------------+ +---- + +=== `SUM()` in `SELECT` statement + +The following example uses the `SUM()` function to calculate the total rent price of all `rental_id`: + +[source,sql] +---- +SELECT SUM (total_price) AS total +FROM rental +---- + +This returns a sum value of the `total_price`: + +[source,sql] +---- ++--------+ +| total | ++--------+ +| 16300 | ++--------+ +---- + +=== `SUM()` with a `NULL` result + +The following example uses the `SUM()` function to calculate the total rent price of the `customer_id = 11118.` + +[source,sql] +---- +SELECT SUM (total_price) AS total +FROM rental +WHERE customer_id = 11118; +---- + +Since no records in the `rental` table have the `customer_id = 11118`, the `SUM()` function returns a `NULL`. + +[source,sql] +---- ++--------+ +| total | ++--------+ +| null | ++--------+ +---- + +=== `SUM()` with `GROUP BY` clause + +You can use the `GROUP BY` clause to group the records in the table and apply the `SUM()` function to each group afterward. + +The following example uses the `SUM()` function and the `GROUP BY` clause to calculate the total price paid by each customer: + +[source,sql] +---- +SELECT customer_id, +SUM (total_price) AS total_spend +FROM rental +GROUP BY customer_id; +---- + +This calculates the `total_price` from a group of `customer_id`: + +[source,sql] +---- ++--------------+--------------+ +| customer_id | total_spend | ++--------------+--------------+ +| 11115 | 3500 | +| 11117 | 3000 | +| 11116 | 1400 | +| 11113 | 1500 | +| 11112 | 3700 | +| 11114 | 3200 | ++--------------+--------------+ +---- + +=== `SUM()` with `HAVING` clause + +You can use the `SUM()` function with the `HAVING` clause to filter out the sum of groups based on a specific condition: + +[source,sql] +---- +SELECT + customer_id, + SUM (total_price) AS total_spend +FROM rental +GROUP BY customer_id +HAVING SUM(total_price) >= 3000; +---- + +This returns the customers who spent greater than or equal to 3000: + +[source,sql] +---- ++--------------+--------------+ +| customer_id | total_spend | ++--------------+--------------+ +| 11115 | 3500 | +| 11117 | 3000 | +| 11112 | 3700 | +| 11114 | 3200 | ++--------------+--------------+ +---- + +=== `SUM()` with multiple expression + +The example uses the following: + +* `SUM()` function to calculate total rental days. +* `JOIN` clause to combine the rental table with the customer table. +* `GROUP BY` group a result-set based on the customers' names. + +[source,sql] +---- +SELECT s.cust_name, SUM(return_date - rental_date ) AS rental_period +FROM rental AS r +JOIN customer AS s +ON r.customer_id = s.customer_id +GROUP BY cust_name; +---- + +The output displays the customers' names with their total rental period. + +[source,sql] +---- ++------------+-------------------+ +| cust_name | rental_period | ++------------+-------------------+ +| Aaron | 7 days 23:56:26 | +| Martha | 14 days 00:14:59 | +| Rosy | 7 days 01:55:39 | +| Nina | 19 days 23:54:39 | +| Alex | 16 days 01:24:26 | +| Alice | 25 days 05:56:04 | ++------------+-------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/boolean-functions/if-function.adoc b/modules/reference/pages/sql/sql-functions/boolean-functions/if-function.adoc new file mode 100644 index 000000000..b6243e4db --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/boolean-functions/if-function.adoc @@ -0,0 +1,144 @@ += IF() +:description: The IF() function returns one value if the condition is TRUE and another value if the condition is FALSE. +:page-topic-type: reference + +The `IF()` function returns one value if the condition is `TRUE` and another value if the condition is `FALSE`. + +== Syntax + +[source,sql] +---- +IF(expression, true_result, else_result) +---- + +The `expression` must evaluate to a Boolean (`TRUE` or `FALSE`). + +== Examples + +=== IF() with a table + +The following example uses a `test_result` table to determine which participants passed and failed: + +[source,sql] +---- +CREATE TABLE test_result ( + applicant_id int, + name text, + score int +); + +INSERT INTO test_result VALUES +(78765,'Mike Aoki',677), +(78786,'Julie Grahams',650), +(78986,'Alexandra Jones',450), +(79742,'Lucas Moore',487), +(79769,'Augustine Harkness',572); +---- + +View the table: + +[source,sql] +---- +SELECT * FROM test_result; +---- + +The query returns: + +[source,sql] +---- ++---------------+--------------------+--------+ +| applicant_id | name | score | ++---------------+--------------------+--------+ +| 78765 | Mike Aoki | 677 | +| 78786 | Julie Grahams | 650 | +| 78986 | Alexandra Jones | 450 | +| 79742 | Lucas Moore | 487 | +| 79769 | Augustine Harkness | 572 | ++---------------+--------------------+--------+ +---- + +. The following query returns `'PASSED'` if the score is 500 or greater, and `'NOT PASSED'` otherwise: ++ +[source,sql] +---- +SELECT name, IF(score>=500, 'PASSED', 'NOT PASSED') FROM test_result; +---- + +. The query returns: ++ +[source,sql] +---- ++--------------------+-------------+ +| name | case | ++--------------------+-------------+ +| Mike Aoki | PASSED | +| Julie Grahams | PASSED | +| Alexandra Jones | NOT PASSED | +| Lucas Moore | NOT PASSED | +| Augustine Harkness | PASSED | ++--------------------+-------------+ +---- + +=== IF() with expressions as return value + +The following example uses a `deptcost` table to determine which departments exceeded the budget: + +[source,sql] +---- +CREATE TABLE deptcost ( + dept text, + budget int, + actual int, + status text +); +INSERT INTO deptcost VALUES +('Finance', 800,677,'within budget'), +('HR', 700,930,'over budget'), +('Marketing', 500,677,'over budget'), +('Project', 720,700,'within budget'), +('Sales', 910,860,'within budget'); +---- + +View the table: + +[source,sql] +---- +SELECT * FROM deptcost; +---- + +The query returns: + +[source,sql] +---- ++-----------+--------+--------+---------------+ +| dept | budget | actual | status | ++-----------+--------+--------+---------------+ +| Finance | 800 | 677 | within budget | +| HR | 700 | 930 | over budget | +| Marketing | 500 | 677 | over budget | +| Project | 720 | 700 | within budget | +| Sales | 910 | 860 | within budget | ++-----------+--------+--------+---------------+ +---- + +. The following query returns the budget difference if `actual` is less than `budget`, and `0` otherwise: ++ +[source,sql] +---- +SELECT dept, IF(actual < budget, budget - actual, 0) FROM deptcost; +---- + +. The query returns: ++ +[source,sql] +---- ++-----------+-----+ +| dept | f | ++-----------+-----+ +| Finance | 123 | +| HR | 0 | +| Marketing | 0 | +| Project | 20 | +| Sales | 50 | ++-----------+-----+ +---- diff --git a/modules/reference/pages/sql/sql-functions/boolean-functions/index.adoc b/modules/reference/pages/sql/sql-functions/boolean-functions/index.adoc new file mode 100644 index 000000000..2f26c20eb --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/boolean-functions/index.adoc @@ -0,0 +1,3 @@ += Boolean Functions +:description: Reference for boolean functions and operators in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-functions/boolean-functions/is-distinct-from-operator.adoc b/modules/reference/pages/sql/sql-functions/boolean-functions/is-distinct-from-operator.adoc new file mode 100644 index 000000000..3253f0ab0 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/boolean-functions/is-distinct-from-operator.adoc @@ -0,0 +1,149 @@ += IS DISTINCT FROM Operator +:description: The IS DISTINCT FROM operator compares two values, considering them distinct even when both are NULL. +:page-topic-type: reference + +The `IS DISTINCT FROM` operator compares two values, considering them distinct even when both are `NULL`. It returns `TRUE` if the two values are different and `FALSE` if they are the same, including the case where both values are `NULL`. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +value1 IS DISTINCT FROM value2 +---- + +Where: + +* `value1` is the first value for comparison. +* `value2` is the second value for comparison. + +== Examples + +=== Basic usage + +Consider this example, which compares two values: + +*Example 1* + +[source,sql] +---- +SELECT NULL IS DISTINCT FROM NULL AS "Result"; +---- + +The preceding query returns the output: + +[source,sql] +---- + Result +-------- + f +---- + +*Example 2* + +[source,sql] +---- +SELECT 10 IS DISTINCT FROM 20 AS "Result"; +---- + +The preceding query returns the output: + +[source,sql] +---- + Result +-------- + t +---- + +*Example 3* + +[source,sql] +---- +SELECT 10 IS DISTINCT FROM 10 AS "Result"; +---- + +The preceding query returns the output: + +[source,sql] +---- + Result +-------- + f +---- + +=== Compare NULL values + +In this example, `NULL` values are compared using the `IS DISTINCT FROM` operator: + +*Example 1* + +[source,sql] +---- +SELECT NULL IS DISTINCT FROM 10 AS "Result"; +---- + +The preceding query returns the output: + +[source,sql] +---- + Result +-------- + t +---- + +*Example 2* + +[source,sql] +---- +SELECT 10 IS DISTINCT FROM NULL AS "Result"; +---- + +The preceding query returns the output: + +[source,sql] +---- + Result +-------- + t +---- + +=== Track inventory variations + +Suppose there is a table named `inventory_changes` that tracks changes in the quantities of products in a warehouse. The table has the structure: + +[source,sql] +---- +CREATE TABLE inventory_changes ( + product_id INT, + change_date DATE, + change_quantity INT +); + +INSERT INTO inventory_changes VALUES +(101, '2023-08-01', 50), +(102, '2023-08-01', 0), +(101, '2023-08-02', -15), +(103, '2023-08-03', 30), +(102, '2023-08-04', 0); +---- + +To retrieve records where the change quantity is distinct from zero, use the `IS DISTINCT FROM` operator. + +[source,sql] +---- +SELECT * +FROM inventory_changes +WHERE change_quantity IS DISTINCT FROM 0; +---- + +The result of the query does not include the 0 values: + +[source,sql] +---- + product_id | change_date | change_quantity +------------+-------------+----------------- + 101 | 2023-08-01 | 50 + 101 | 2023-08-02 | -15 + 103 | 2023-08-03 | 30 +---- diff --git a/modules/reference/pages/sql/sql-functions/boolean-functions/is-not-distinct-from-operator.adoc b/modules/reference/pages/sql/sql-functions/boolean-functions/is-not-distinct-from-operator.adoc new file mode 100644 index 000000000..296a2d8a9 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/boolean-functions/is-not-distinct-from-operator.adoc @@ -0,0 +1,151 @@ += IS NOT DISTINCT FROM Operator +:description: The IS NOT DISTINCT FROM operator is a counterpart to IS DISTINCT FROM. +:page-topic-type: reference + +== Overview + +The `IS NOT DISTINCT FROM` operator is a counterpart to `IS DISTINCT FROM`. + +It compares two values, treating them as equal even when they are both `NULL`. This operator returns `TRUE` if the two values are the same, including the case where both values are `NULL` and `FALSE` if they are different. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +value1 IS NOT DISTINCT FROM value2 +---- + +Where: + +* `value1` is the first value for comparison. +* `value2` is the second value for comparison. + +== Examples + +=== Basic usage + +Consider this example, which compares two values: + +*Example 1* + +[source,sql] +---- +SELECT 45 IS NOT DISTINCT FROM 45 AS "Result"; +---- + +The preceding query returns the output: + +[source,sql] +---- + Result +-------- + t +---- + +*Example 2* + +[source,sql] +---- +SELECT 60 IS NOT DISTINCT FROM 30 AS "Result"; +---- + +The preceding query returns: + +[source,sql] +---- + Result +-------- + f +---- + +*Example 3* + +[source,sql] +---- +SELECT NULL IS NOT DISTINCT FROM NULL AS "Result"; +---- + +The preceding query returns: + +[source,sql] +---- + Result +-------- + t +---- + +=== Compare NULL values + +In this example, NULL values are compared using the IS NOT DISTINCT FROM operator: + +*Example 1* + +[source,sql] +---- +SELECT NULL IS NOT DISTINCT FROM 80 AS "Result"; +---- + +The preceding query returns: + +[source,sql] +---- + Result +-------- + f +---- + +*Example 2* + +[source,sql] +---- +SELECT 5 IS NOT DISTINCT FROM NULL AS "Result"; +---- + +The preceding query returns: + +[source,sql] +---- + Result +-------- + f +---- + +=== Analyze data completeness + +Suppose there is a table named customer_contacts that stores customer contact information. + +[source,sql] +---- +CREATE TABLE customer_contacts ( + customer_id INT, + email TEXT, + phone TEXT +); + +INSERT INTO customer_contacts VALUES +(101, 'john@example.com', NULL), +(102, NULL, '+1234567890'), +(103, 'jane@example.com', '+9876543210'), +(104, NULL, NULL), +(105, 'alex@example.com', '+5555555555'); +---- + +The objective is to retrieve records from this table where an email address or a phone number is available for contacting the customers. + +[source,sql] +---- +SELECT * +FROM customer_contacts +WHERE email IS NOT DISTINCT FROM phone; +---- + +This query retrieves all rows from the `customer_contacts table` where the email and phone are NULL. The result shows that the customer with `customer_id 104` has no phone number or email address. + +[source,sql] +---- + customer_id | email | phone +-------------+-------+------- + 104 | | +---- diff --git a/modules/reference/pages/sql/sql-functions/index.adoc b/modules/reference/pages/sql/sql-functions/index.adoc new file mode 100644 index 000000000..5a75d5cd2 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/index.adoc @@ -0,0 +1,18 @@ += SQL Functions +:description: Redpanda SQL supports the following function types for querying and transforming data. + +Redpanda SQL supports the following function types for querying and transforming data: + + +[width="100%",cols="<42%,<58%",options="header",] +|=== +|Function Name |Description +|xref:reference:sql/sql-functions/boolean-functions/if-function.adoc[BOOLEAN FUNCTIONS] |Evaluate logical conditions and return `TRUE`, `FALSE` OR `NULL` +|xref:reference:sql/sql-functions/math-functions/index.adoc[MATH, TRIGONOMETRIC, AND HYPERBOLIC FUNCTIONS] |Perform mathematical operations on numeric data, including rounding, exponentiation, and trigonometric calculations +|xref:reference:sql/sql-functions/string-functions/index.adoc[STRING FUNCTIONS] |Manipulate string data for text processing, including concatenation, substring extraction and case conversion +|xref:reference:sql/sql-functions/timestamp-functions/index.adoc[TIMESTAMP FUNCTIONS] |Handle data and time values including extracting components, adding intervals and comparing timestamps +|xref:reference:sql/sql-functions/json-functions/index.adoc[JSON FUNCTIONS] |Manipulate and query JSON data stored in the database, including extracting values and creating JSON objects +|xref:reference:sql/sql-functions/aggregate-functions/index.adoc[AGGREGATE FUNCTIONS] |Summarize a set of values and return a single result, such as calculating sums, averages and counts +|xref:reference:sql/sql-functions/window-functions/index.adoc[WINDOW FUNCTIONS] |Operate over a subset of rows defined by a windowing clause, enabling ranking, aggregation and row numbering within result sets +|xref:reference:sql/sql-functions/other-functions/index.adoc[OTHER FUNCTIONS] |Includes a variety of specialized functions not categorized elsewhere +|=== diff --git a/modules/reference/pages/sql/sql-functions/json-functions/index.adoc b/modules/reference/pages/sql/sql-functions/json-functions/index.adoc new file mode 100644 index 000000000..2967a5903 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/json-functions/index.adoc @@ -0,0 +1,24 @@ += Overview +:description: Redpanda SQL provides functions to query and manipulate JSON data. + +Redpanda SQL provides functions to query and manipulate JSON data: + +[width="100%",cols="53%,47%",options="header",] +|=== +|*Functions* |*Description* +|xref:reference:sql/sql-functions/json-functions/json-extract-path.adoc[JSON_EXTRACT_PATH()] |Extracts a JSON sub-object at the specified path. +|xref:reference:sql/sql-functions/json-functions/json-extract-path-text.adoc[JSON_EXTRACT_PATH_TEXT()] |Returns text referenced by a series of path elements in a JSON string or JSON body. +|xref:reference:sql/sql-functions/json-functions/json-array-length.adoc[JSON_ARRAY_LENGTH()] |Returns the number of elements in the outer array of a JSON string or JSON body. +|xref:reference:sql/sql-functions/json-functions/json-array-extract.adoc[JSON_ARRAY_EXTRACT()] |Returns the JSON array as a set of JSON values. +|=== + +Use operators to specify conditions when using JSON functions. Redpanda SQL also supports the following JSON operators: + +[width="100%",cols="12%,46%,42%",options="header",] +|=== +|*Operators* |*Description* |*Example* +|-> |Gets and returns the element of the JSON array. |`'[{"a":"cab"},{"b":"bac"},{"c":"abc"}]'::json -> 2` +|-> |Gets and returns the JSON object field. |`'{"a": {"b":"abc"}}'::json -> 'a'` +|->> |Gets and returns the element of the JSON array as text. |`'[11,22,33]'::json ->> 2` +|->> |Gets and returns the JSON object field as text. |`'{"a":13,"b":33}'::json ->> 'b'` +|=== diff --git a/modules/reference/pages/sql/sql-functions/json-functions/json-array-extract.adoc b/modules/reference/pages/sql/sql-functions/json-functions/json-array-extract.adoc new file mode 100644 index 000000000..b0a8bef5e --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/json-functions/json-array-extract.adoc @@ -0,0 +1,84 @@ += JSON_ARRAY_EXTRACT +:description: The JSON_ARRAY_EXTRACT() function returns the JSON array as a set of JSON values. +:page-topic-type: reference + +== Overview + +The `JSON_ARRAY_EXTRACT()` function returns the JSON array as a set of JSON values. + +== Syntax + +The `JSON_ARRAY_EXTRACT()` has the following basic syntax. + +[source,sql] +---- +JSON_ARRAY_EXTRACT('json_array'::JSON,id); +---- + +`JSON_ARRAY_EXTRACT()` requires the following parameters: + +* `json_array`: The array to extract. +* `::JSON`: Argument indicating that the query is of type JSON. +* `id`: ID of the element to extract. It is read in an array format that starts with 0. + +=== Another option + +`JSON_ARRAY_EXTRACT` can also be achieved with the `->` operator, as shown in the following syntax: + +[source,sql] +---- +SELECT 'from_json'::JSON -> path; +---- + +* `from_json`: The JSON value from which to extract. +* `::JSON`: A symbol that casts the string literal to a JSON type. +* `path`: Key of the field to extract. + +== Examples + +=== Basic JSON_ARRAY_EXTRACT() function + +. The following example extracts a JSON array as a JSON set. ++ +[source,sql] +---- +SELECT JSON_ARRAY_EXTRACT('["Bougenvile", 2, 12, "Lily"]'::JSON,3); +---- ++ +*or* ++ +[source,sql] +---- +SELECT ('["Bougenvile", 2, 12, "Lily"]'::JSON -> 3); +---- + +. The extracted array will look like the following. ++ +[source,sql] +---- ++------------+ +| f | ++------------+ +| "Lily" | ++------------+ +---- + +=== Extract element of JSON array as text + +. This example extracts the element of the JSON array as text with the `->>` operator. ++ +[source,sql] +---- +SELECT ('["Bougenvile", 2, 12, "Lily"]'::JSON ->> 1); +---- + +. Output: ++ +[source,sql] +---- ++------------+ +| f | ++------------+ +| 2.000000 | ++------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/json-functions/json-array-length.adoc b/modules/reference/pages/sql/sql-functions/json-functions/json-array-length.adoc new file mode 100644 index 000000000..956f54d3c --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/json-functions/json-array-length.adoc @@ -0,0 +1,78 @@ += JSON_ARRAY_LENGTH +:description: The JSON_ARRAY_LENGTH() function returns the length of a specified JSON array. +:page-topic-type: reference + +The `JSON_ARRAY_LENGTH()` function returns the length of a specified JSON array. + +== Syntax + +This function has the following basic syntax. + +[source,sql] +---- +JSON_ARRAY_LENGTH(arrayval JSON) +---- + +The required argument for this function is `arrayval`. It represents the JSON array for which to count the length. + +== Examples + +=== Get a JSON array length with a JSON value + +The following example returns the number of elements in the array: + +[source,sql] +---- +SELECT JSON_ARRAY_LENGTH('[4, 7, 10, 11, 14, {"vegetables":"spinach","fruits":"melon"}, {"a":"b"}]'); +---- + +This function returns the following result: + +[source,sql] +---- ++-------+ +| f | ++-------+ +| 7 | ++-------+ +---- + +=== Get a JSON array length with a number + +The following example returns the number of elements in the array. + +[source,sql] +---- +SELECT JSON_ARRAY_LENGTH('[1, 2, [3, 4]]'); +---- + +The query returns: + +[source,sql] +---- ++-------+ +| f | ++-------+ +| 3 | ++-------+ +---- + +=== JSON array length where the array is NULL or empty + +This example shows that an empty JSON array returns 0. + +[source,sql] +---- +SELECT JSON_ARRAY_LENGTH('[]'); +---- + +An empty array returns 0: + +[source,sql] +---- ++-------+ +| f | ++-------+ +| 0 | ++-------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path-text.adoc b/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path-text.adoc new file mode 100644 index 000000000..17b25dfcf --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path-text.adoc @@ -0,0 +1,66 @@ += JSON_EXTRACT_PATH_TEXT +:description: The JSON_EXTRACT_PATH_TEXT() function extracts JSON nested value from a specified JSON value according to the defined path. +:page-topic-type: reference + +The `JSON_EXTRACT_PATH_TEXT()` function extracts JSON nested value from a specified JSON value according to the defined path. + +[NOTE] +==== +This function may be similar to the `JSON_EXTRACT_PATH()`. This function returns a value of type text instead of type JSON. +==== + +== Syntax + +The `JSON_EXTRACT_PATH_TEXT()` syntax is as follows: + +[source,sql] +---- +JSON_EXTRACT_PATH_TEXT(from_json JSON, path TEXT[]) +---- + +The required arguments are: + +* `from_json`: The JSON value to extract. +* `path`: The path to extract. + +=== Another option + +Redpanda SQL also provides and supports the use of operators in queries. Here's the syntax: + +[source,sql] +---- +SELECT 'from_json'::JSON ->> 'path'; +---- + +* `from_json`: The JSON value from which to extract. +* `::JSON`: A symbol that casts the text literal to a JSON type. +* `path`: Key of the field to extract. + +== Examples + +. This example shows how to use the `JSON_EXTRACT_PATH_TEXT()` function to extract values ​​from a JSON object at a specified index. ++ +Run the query: ++ +[source,sql] +---- +SELECT JSON_EXTRACT_PATH_TEXT('{"a": "Oxla", "b": {"x": 1.234, "y": 4.321}}', 'a') AS "result a"; +---- ++ +*or* ++ +[source,sql] +---- +SELECT '{"a": "Oxla", "b": {"x": 1.234, "y": 4.321}}'::JSON ->> 'a' AS "result a"; +---- + +. The `JSON_EXTRACT_PATH_TEXT()` function extracts the values and returns the output: ++ +[source,sql] +---- ++------------+ +| result a | ++------------+ +| Oxla | ++------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path.adoc b/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path.adoc new file mode 100644 index 000000000..1d2888640 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path.adoc @@ -0,0 +1,79 @@ += JSON_EXTRACT_PATH +:description: JSON_EXTRACT_PATH() function extracts JSON nested value from a specified path. +:page-topic-type: reference + +`JSON_EXTRACT_PATH()` function extracts JSON nested value from a specified path. + +== Syntax + +The syntax of the `JSON_EXTRACT_PATH()` function is: + +[source,sql] +---- +JSON_EXTRACT_PATH(from_json JSON, path TEXT[]) +---- + +* `from_json`: The JSON value from which to extract. +* `path`: The path to extract. + +=== Another option + +Redpanda SQL also provides and supports the use of operators in queries: + +[source,sql] +---- +SELECT 'from_json'::JSON -> 'path'; +---- + +* `from_json`: The JSON value from which to extract. +* `::JSON`: A symbol that casts the text literal to a JSON type. +* `path`: Key of the field to extract. + +== Examples + +These examples display how `JSON_EXTRACT_PATH()` extracts the "`oxla`" JSON sub-object from the specified path. + +. Use the query: ++ +[source,sql] +---- +SELECT JSON_EXTRACT_PATH('{"f2":{"f3":1},"f4":{"f5":99,"f6":"oxla"}}', 'f4', 'f6'); +---- ++ +*or* ++ +[source,sql] +---- +SELECT '{"f2":{"f3":1},"f4":{"f5":99,"f6":"oxla"}}'::JSON -> 'f4' -> 'f6'; +---- ++ +The query results: ++ +[source,sql] +---- ++---------+ +| f | ++---------+ +| "oxla" | ++---------+ +---- + +. Run the query: ++ +[source,sql] +---- +SELECT + JSON_EXTRACT_PATH('{"a": 1, "b": {"x": "subtract", "y": "plus"}}', 'b', 'x') AS "bx", + JSON_EXTRACT_PATH('{"a": 1, "b": {"x": "multiply", "y": "divide"}}', 'b', 'y') AS "by"; +---- ++ +The query returns: ++ +[source,sql] +---- ++---------------+-------------+ +| bx | by | ++---------------+-------------+ +| "subtract" | "divide" | ++---------------+-------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/abs.adoc b/modules/reference/pages/sql/sql-functions/math-functions/abs.adoc new file mode 100644 index 000000000..d0b9413f0 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/abs.adoc @@ -0,0 +1,107 @@ += ABS +:description: The ABS() function returns an absolute number (for example, the positive value of a number). +:page-topic-type: reference + +The `ABS()` function returns an absolute number (for example, the positive value of a number). The data type of the returned value will depend on the data type of the value passed to the `ABS()` function. + +== Syntax + +Here's the syntax for the `ABS()`function: + +[source,sql] +---- +ABS(x) +---- + +The `ABS()` function requires one argument: + +* `x`: An expression that evaluates to a number. + +[NOTE] +==== +The *ABS()* function will return the negation of the negative numbers. +==== + +== Examples + +=== Absolute value of a negative number + +This example demonstrates how to use the `ABS()` function to obtain the absolute value of a negative number: + +[source,sql] +---- +SELECT ABS(-10.25); +---- + +This returns an absolute value of the passed argument: + +[source,sql] +---- ++--------+ +| f | ++--------+ +| 10.25 | ++--------+ +---- + +=== ABS() function with an expression + +This example demonstrates how to use the `ABS()` function with an expression to obtain the absolute value of the result: + +[source,sql] +---- +SELECT ABS( 100 - 250); +---- + +The result of this statement is *-150*. However, the output is *150*, as 150 is the positive version of -150. + +[source,sql] +---- ++------+ +| f | ++------+ +| 150 | ++------+ +---- + +=== Use the ABS() function with a table + +This example demonstrates how to use the `ABS()` function with a table to obtain the absolute values of all numbers in a specific column: + +. First, create a table named absTable containing an *_initialValue_* column with some positive and negative values: ++ +[source,sql] +---- +CREATE TABLE absTable(initialValue float); + +INSERT INTO absTable(initialValue) +VALUES +(550), +(-210), +(72.12), +(-87.93), +(-0.0); +---- + +. Next, use this query to find the absolute value of all numbers: ++ +[source,sql] +---- +SELECT initialValue, ABS(initialValue) AS absoluteValue +FROM absTable; +---- + +. This query retrieves all values in the *"`initialValue`"* column and their absolute values in the *"`absoluteValue`"* column. The output will look something like this: ++ +[source,sql] +---- ++---------------+----------------+ +| initialValue | absoluteValue | ++---------------+----------------+ +| 550 | 550 | +| -210 | 210 | +| 72.12 | 72.12 | +| -87.93 | 87.93 | +| -0 | 0 | ++---------------+----------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/cbrt.adoc b/modules/reference/pages/sql/sql-functions/math-functions/cbrt.adoc new file mode 100644 index 000000000..c9e65bf73 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/cbrt.adoc @@ -0,0 +1,137 @@ += CBRT +:description: The CBRT() function calculates and returns the cube root of a given number. +:page-topic-type: reference + +The `CBRT()` function calculates and returns the cube root of a given number. In mathematical terms, for a number _x_, its cube root _y_ is determined by the equation _y³ = x_. + +== Syntax + +The syntax for the `CBRT()` function is: + +[source,sql] +---- +CBRT(number) +---- + +Where: + +* `number`: A required value representing the number for which to calculate the cube root. It can be a positive or negative whole number, a decimal, or even an expression that evaluates to a number. + +For example, expressions like `SELECT CBRT(some_column) from test_table`, assuming `some_column` contains a numeric value. + +[NOTE] +==== +*Return Value:* + - Returns `NULL` if the argument is `NULL`. + - Returns an error if the input parameter is not a numeric type. +==== + +== Examples + +These examples show how to use the `CBRT()` function: + +=== Basic cube root calculation + +Consider the example: + +[source,sql] +---- + SELECT CBRT(125); +---- + +The result of this query will be: + +[source,sql] +---- + cbrt +------ + 5 +---- + +=== Cube root of a negative value + +To calculate the cube root of a negative number, use the `CBRT()` function as shown: + +[source,sql] +---- + SELECT CBRT(-125); +---- + +Result: + +[source,sql] +---- + cbrt +------ + -5 +---- + +=== Cube root of decimal result + +For calculations with decimal numbers: + +[source,sql] +---- +SELECT CBRT(32); +---- + +The result is a decimal value: + +[source,sql] +---- + cbrt +------------------- + 3.174802103936399 +---- + +=== Cube root of decimal input + +In this scenario, fractional seconds are incorporated into the argument: + +[source,sql] +---- +SELECT CBRT(0.12815); +---- + +The result will be the cube root of the provided decimal value. + +[source,sql] +---- + cbrt +------------ + 0.50416523 +---- + +=== Handle incorrect argument + +When a non-numeric argument is provided, the `CBRT()` function works as shown here: + +[source,sql] +---- +SELECT CBRT('abc'); +---- + +The function returns an error and the result is not valid. + +[source,sql] +---- +invalid input syntax for type double precision: "abc" +---- + +=== CBRT operator (`||/(x)`) + +This example uses the CBRT operator (`||/(x)`) to calculate the cube root of a number: + +[source,sql] +---- +SELECT ||/(1728) AS cbrt_operator; +---- + +This example calculates the cube root of 1728 using the CBRT operator. The result of this query will be: + +[source,sql] +---- + cbrt_operator +-------------------- + 12.000000000000002 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/ceil.adoc b/modules/reference/pages/sql/sql-functions/math-functions/ceil.adoc new file mode 100644 index 000000000..49fd4a73f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/ceil.adoc @@ -0,0 +1,106 @@ += CEIL +:description: The CEIL() function returns the nearest positive or negative integer value greater than or equal to the provided decimal input number. +:page-topic-type: reference + +The `CEIL()` function returns the nearest positive or negative integer value greater than or equal to the provided decimal input number. + +== Syntax + +The syntax of the `CEIL()` function is: + +[source,sql] +---- +CEIL(x) +---- + +The `CEIL()` function requires one argument: + +* `x`: A positive or a negative decimal number (or an expression that evaluates to a decimal number). + +== Examples + +=== Round up a positive decimal value + +This example demonstrates how the `CEIL()` function rounds up a positive decimal value: + +[source,sql] +---- +SELECT CEIL (300.55); +---- + +This returns 301, as it is the nearest integer value greater than 300.55. + +[source,sql] +---- ++------+ +| f | ++------+ +| 301 | ++------+ +---- + +=== Round up a negative decimal value + +This example demonstrates how the `CEIL()` function rounds up a negative decimal value: + +[source,sql] +---- +SELECT CEIL(-89.9) AS "Ceil"; +---- + +The output of this statement is -89, as -89 is the nearest integer value greater than or equal to -89.9. + +[source,sql] +---- ++-------+ +| Ceil | ++-------+ +| -89 | ++-------+ +---- + +=== Use the `CEIL()` function with a table + +This example demonstrates how to use the `CEIL()` function with a table to round up values in a specific column: + +. First, create a table called *_CeilRecords_*: ++ +[source,sql] +---- +CREATE TABLE CeilRecords (numbers float); + +INSERT INTO CeilRecords(numbers) +VALUES + (-28.85), + (-9.4), + (0.87), + (78.16), + (42.16); +---- ++ +This statement creates a table called *"`CeilRecords`"* with a column called *"`numbers`"* and insert 5 decimal values into it. + +. Retrieve and round up the value for all records in the *numbers* column: ++ +[source,sql] +---- +SELECT *, CEIL(numbers) AS CeilValue FROM CeilRecords; +---- ++ +The final result will contain: ++ +* A *numbers* column with initial decimal values. +* A *CeilValue* column with rounded-up integer values. ++ +[source,sql] +---- ++---------+------------+ +| numbers | CeilValue | ++---------+------------+ +| -28.85 | -28 | +| -9.4 | -9 | +| 0.87 | 1 | +| 78.16 | 79 | +| 42.16 | 43 | ++---------+------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/cosh.adoc b/modules/reference/pages/sql/sql-functions/math-functions/cosh.adoc new file mode 100644 index 000000000..357143aaa --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/cosh.adoc @@ -0,0 +1,69 @@ += COSH +:description: The COSH() function returns the hyperbolic cosine of a specified numeric argument. +:page-topic-type: reference + +The `COSH()` function returns the hyperbolic cosine of a specified numeric argument. + +== Syntax + +[source,sql] +---- +COSH(x) +---- + +== Arguments + +* `x`: A positive or negative number of type `real` or `double precision`. + +== Return type + +Same as the input type (`real` or `double precision`). + +== Examples + +=== Positive value + +[source,sql] +---- +SELECT COSH(1); +---- + +[source,sql] +---- + cosh +-------------------- + 1.5430806348152437 +(1 row) +---- + +=== Zero + +[source,sql] +---- +SELECT COSH(0); +---- + +[source,sql] +---- + cosh +------ + 1 +(1 row) +---- + +=== Negative value + +`COSH` is symmetric, so `COSH(-x)` returns the same result as `COSH(x)`: + +[source,sql] +---- +SELECT COSH(-1); +---- + +[source,sql] +---- + cosh +-------------------- + 1.5430806348152437 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/exp.adoc b/modules/reference/pages/sql/sql-functions/math-functions/exp.adoc new file mode 100644 index 000000000..c4712341b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/exp.adoc @@ -0,0 +1,80 @@ += EXP +:description: The EXP() function returns the exponential value of a number specified in the argument. +:page-topic-type: reference + +The `EXP()` function returns the exponential value of a number specified in the argument. + +== Syntax + +The syntax for the `EXP()` is: + +[source,sql] +---- +EXP(number); +---- + +Where: + +* `number`: The number for which to calculate the exponential value. Equivalent to the formula `e^number`. + +== Examples + +The examples here show how the `EXP()` function works. + +=== Basic usage + +This example uses the `EXP()` function with positive and negative values. + +[source,sql] +---- +SELECT EXP(0) AS "EXP of 0", + EXP(1) AS "EXP of 1", + EXP(2) AS "EXP of 2", + EXP(-1) AS "EXP of -1", + EXP(-2) AS "EXP of -2"; +---- + +The query returns: + +[source,sql] +---- +EXP of 0 | EXP of 1 | EXP of 2 | EXP of -1 | EXP of -2 +----------+-------------------+------------------+---------------------+-------------------- + 1 | 2.718281828459045 | 7.38905609893065 | 0.36787944117144233 | 0.1353352832366127 +---- + +=== Use `EXP()` with fractions + +This case uses the `EXP()` function with a fractional argument. + +[source,sql] +---- +SELECT EXP(3.2); +---- + +Here is the result: + +[source,sql] +---- + exp +-------------------- + 24.532531366911574 +---- + +=== Use `EXP()` with expressions + +This example uses the `EXP()` function with expressions. + +[source,sql] +---- +SELECT EXP(5 * 5); +---- + +The query returns: + +[source,sql] +---- + exp +------------------- + 72004899337.38588 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/floor.adoc b/modules/reference/pages/sql/sql-functions/math-functions/floor.adoc new file mode 100644 index 000000000..c25092e54 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/floor.adoc @@ -0,0 +1,96 @@ += FLOOR +:description: The FLOOR() returns a number rounded down that is less than or equal to the specified argument. +:page-topic-type: reference + +The `FLOOR()` returns a number rounded down that is less than or equal to the specified argument. + +== Syntax + +The syntax for the `FLOOR()` function in Redpanda SQL is: + +[source,sql] +---- +FLOOR(x) +---- + +The `FLOOR()` function requires one argument: + +`x`: A positive or a negative decimal number (or an expression that evaluates to a decimal number). + +== Examples + +=== Round down a positive decimal value + +This example demonstrates how the `FLOORL()` function rounds down a positive decimal value: + +[source,sql] +---- +SELECT FLOOR(345.6765467); +---- + +This returns 345 as it is the closest value smaller than the argument. + +[source,sql] +---- ++------+ +| f    | ++------+ +| 345  | ++------+ +---- + +=== Round down a negative decimal value + +This example demonstrates how the `FLOORL()` function rounds down a negative decimal value: + +[source,sql] +---- +SELECT FLOOR(-0.987657); +---- + +The result is the nearest integer smaller than or equal to the specified argument. + +[source,sql] +---- ++-------+ +| f | ++-------+ +| -1    | ++-------+ +---- + +=== Use the FLOOR() function with a table + +This example demonstrates how to use the `FLOOR()` function with a table to round down values in a specific column: + +. Create a new table called *FloorRecords* with double-precision values: ++ +[source,sql] +---- +CREATE TABLE FloorRecords (numbers float); +INSERT INTO FloorRecords VALUES (3.987), (4.325), (-0.76), (-22.57); +---- + +. Retrieve the table with its values: ++ +[source,sql] +---- +SELECT * ,FLOOR(numbers) AS Floorvalue FROM FloorRecords; +---- + +. Result: ++ +* *numbers,* the column with the initial double-precision values. +* *FloorValue*, the column with the rounded-down values. ++ +[source,sql] +---- ++------------+---------------+ +| numbers | Floorvalue | ++------------+---------------+ +| 3.987 | 3 | +| 4.325 | 4 | +| -0.76 | -1 | +| -22.57 | -23 | ++------------+---------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/greatest.adoc b/modules/reference/pages/sql/sql-functions/math-functions/greatest.adoc new file mode 100644 index 000000000..464f0fc0c --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/greatest.adoc @@ -0,0 +1,159 @@ += GREATEST +:description: The GREATEST() function extracts the greatest or largest value from a set of values. +:page-topic-type: reference + +The `GREATEST()` function returns the greatest value from a set of values. The arguments must be of compatible types. For example, comparing a text value with a number returns an error. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +GREATEST(value_1, [value_n]) +---- + +Where: + +* `value_1`: Represents the first value. +* `value_n`: Represents one or more additional values, separated by commas. + +[NOTE] +==== +* `NULL` values within the expressions are ignored. +* The result is `NULL` if all expressions evaluate to `NULL`. +==== + +== Examples + +=== Basic usage + +[source,sql] +---- +SELECT GREATEST(3,5,8,9,10); +---- + +The query returns `10`, the greatest value among the provided values. + +[source,sql] +---- +greatest +--------- + 10 +---- + +=== String comparison + +String comparison is also supported: + +[source,sql] +---- +SELECT GREATEST('apple', 'banana', 'cherry'); +---- + +The query returns `'cherry'`, the greatest string in lexicographic order. + +[source,sql] +---- +greatest +---------- + cherry +---- + +=== Handle NULL values + +`NULL` values are ignored when determining the greatest value: + +[source,sql] +---- +SELECT GREATEST (5,null,9); +---- + +The query returns `9`, the greatest non-NULL value. + +[source,sql] +---- +greatest +---------- + 9 +---- + +=== Positive and negative numbers + +Negative numbers can also be compared: + +[source,sql] +---- +SELECT GREATEST (4,-4,-8,8); +---- + +The query returns `8`, the greatest value among the provided numbers. + +[source,sql] +---- +greatest +---------- + 8 +---- + +=== Use table data + +You can also use `GREATEST()` to find the greatest value across columns. For example, create a table named `Student` that stores student names and scores. + +[source,sql] +---- +CREATE TABLE Student( + Student_name TEXT, + Student_Class TEXT, + Subject1 INT, + Subject2 INT, + Subject3 INT, + Subject4 INT +); + +INSERT INTO + Student(Student_name, Student_Class, Subject1, Subject2, Subject3, Subject4) +VALUES + ('Sayan', 'Junior', 81, 90, 86, 92 ), + ('Nitin', 'Junior', 90, 84, 88, 91 ), + ('Aniket', 'Senior', 81, 80, 87, 95 ), + ('Abdur', 'Junior', 85, 90, 80, 90 ), + ('Sanjoy', 'Senior', 88, 82, 84, 90 ); +---- + +Use the `SELECT` statement to view all the records: + +[source,sql] +---- +SELECT * FROM Student; +---- + +[source,sql] +---- +student_name | student_class | subject1 | subject2 | subject3 | subject4 +--------------+---------------+----------+----------+----------+---------- + Sayan | Junior | 81 | 90 | 86 | 92 + Nitin | Junior | 90 | 84 | 88 | 91 + Aniket | Senior | 81 | 80 | 87 | 95 + Abdur | Junior | 85 | 90 | 80 | 90 + Sanjoy | Senio | 88 | 82 | 84 | 90 +---- + +Find the greatest mark for each student across all subjects: + +[source,sql] +---- +Select Student_name, GREATEST(Subject1, Subject2, Subject3, Subject4) AS Greatest_Mark +FROM Student; +---- + +[source,sql] +---- +student_name | greatest_mark +--------------+--------------- + Sayan | 92 + Nitin | 91 + Aniket | 95 + Abdur | 90 + Sanjoy | 90 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/index.adoc b/modules/reference/pages/sql/sql-functions/math-functions/index.adoc new file mode 100644 index 000000000..ef1a60eec --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/index.adoc @@ -0,0 +1,61 @@ += Overview +:description: Mathematical, trigonometric, and hyperbolic functions in Redpanda SQL are designed to perform mathematical calculations and manipulate numeric values. +:page-topic-type: reference + +Mathematical, trigonometric, and hyperbolic functions in Redpanda SQL are designed to perform mathematical calculations and manipulate numeric values. + +== Mathematical functions + +[width="100%",cols="47%,53%",options="header",] +|=== +|Function |Description +|xref:reference:sql/sql-functions/math-functions/abs.adoc[ABS()] |This function returns the absolute value of an argument, regardless of whether it is positive or negative +|xref:reference:sql/sql-functions/math-functions/cbrt.adoc[CBRT()] |This function returns the cube root of a given number +|xref:reference:sql/sql-functions/math-functions/ceil.adoc[CEIL()] |This function rounds up to the nearest positive or negative integer value greater than or equal to the argument +|xref:reference:sql/sql-functions/math-functions/exp.adoc[EXP()] |This function returns the exponential value of a number specified in the argument +|xref:reference:sql/sql-functions/math-functions/floor.adoc[FLOOR()] |This function returns a number rounded down that is less than or equal to the specified argument +|xref:reference:sql/sql-functions/math-functions/greatest.adoc[GREATEST()] |This function extracts the greatest or largest value from a set of values. +|xref:reference:sql/sql-functions/math-functions/least.adoc[LEAST()] |This function returns the least or smallest value in a list of values +|xref:reference:sql/sql-functions/math-functions/ln.adoc[LN()] |This function returns the exponential value of its argument +|xref:reference:sql/sql-functions/math-functions/log.adoc[LOG()] |This function returns the base-10 logarithm or logarithm of the specified base of a given number +|xref:reference:sql/sql-functions/math-functions/power.adoc[POWER()] |This function returns the value of a number raised to the power of another number specified in the arguments +|xref:reference:sql/sql-functions/math-functions/random.adoc[RANDOM()] |This function returns a random number between 0 (inclusive) and 1 (exclusive) +|xref:reference:sql/sql-functions/math-functions/round.adoc[ROUND()] |This function rounds numbers to the nearest integer or to a specified number of decimal places +|xref:reference:sql/sql-functions/math-functions/sign.adoc[SIGN()] |This function returns -1 for negative arguments, 1 for positive arguments or 0 if the argument is 0 +|xref:reference:sql/sql-functions/math-functions/sqrt.adoc[SQRT()] |This function returns the square root of its argument +|xref:reference:sql/sql-functions/math-functions/to-char-from-number.adoc[TO_CHAR() from Number] |Formats a number into a string using a given format +|=== + +== Trigonometric functions + +These trigonometric functions take arguments and return values of type `double precision` and `real`. + +[width="100%",cols="6%,32%,26%,36%",options="header",] +|=== +|*Function* |*Description* |*Syntax* |*Example* +|`acos` |Calculates the inverse cosine of a given argument, where the output is expressed in radians. |`acos(argument)` |`select acos(1);` Returns: `0` +|`acosd` |Calculates the inverse cosine of a given argument, where the output is expressed in degrees. |`acosd(argument)` |`select acosd(0.5);` Returns: `60` +|`asin` |Calculates the inverse sine of a given argument, where the output is expressed in radians. |`asin(argument)` |`select asin(1);` Returns: `1.5707963267948966` +|`asind` |Calculates the inverse sine of a given argument, where the output is expressed in degrees. |`asind(argument)` |`select asind(0.5);` Returns: `30` +|`atan` |Calculates the inverse tangent of a given argument, where the output is expressed in radians. |`atan(argument)` |`select atan(1);` Returns: `0.7853965` +|`atand` |Calculates the inverse tangent of a given argument, where the output is expressed in degrees. |`atand(argument)` |`select atand(1);` Returns: `44.99990469434657` +|`atan2` |Calculates the inverse tangent of y/x, where the output is expressed in radians. |`atan2(y_value, x_value)` `y_value` and `x_value` are in double precision type. |`select atan2(1, 0);` Returns: `1.5707963267948966` +|`atan2d` |Calculates the inverse tangent of y/x, where the output is expressed in degrees. |`atan2d(y_value, x_value)` `y_value` and `x_value` are in double precision type. |`select atan2d(1, 0);` Returns: `90` +|`cos` |Calculates the cosine of a given argument, where the argument is in radians. |`cos(argument)` |`select cos(0);` Returns: `1` +|`cosd` |Calculates the cosine of a given argument, where the argument is in degrees. |`cosd(argument)` |`select cosd(60);` Returns: `0.5000000000000001` +|`cot` |Calculates the cotangent of a given argument, where the argument is in radians. |`cot(argument)` |`select cot(0.5);` Returns: `1.8304877` +|`cotd` |Calculates the cotangent of a given argument, where the argument is in degrees. |`cotd(argument)` |`select cotd(45);` Returns: `1.0000000000000002` +|xref:reference:sql/sql-functions/math-functions/sin.adoc[sin] |Calculates the sine of a given argument, where the argument is in radians. |`sin(argument)` |`select sin(1);` Returns: `0.8414709848078965` +|`sind` |Calculates the sine of a given argument, where the argument is in degrees. |`sind(argument)` |`select sind(30);` Returns: `0.49999999999999994` +|`tan` |Calculates the tangent of a given argument, where the argument is in radians. |`tan(argument)` |`select tan(1);` Returns: `1.5574077246549023` +|`tand` |Calculates the tangent of a given argument, where the argument is in degrees. |`tand(argument)` |`select tand(45);` Returns: `0.9999999999999999` +|=== + +== Hyperbolic functions + +[width="100%",cols="6%,32%,26%,36%",options="header",] +|=== +|*Function* |*Description* |*Syntax* |*Example* +|xref:reference:sql/sql-functions/math-functions/cosh.adoc[cosh] |Calculates the hyperbolic cosine of a given argument. |`cosh(argument)` |`select cosh(1);` Returns: `1.5430806348152437` +|xref:reference:sql/sql-functions/math-functions/sinh.adoc[sinh] |Calculates the hyperbolic sine of a given argument. |`sinh(argument)` |`select sinh(1);` Returns: `1.1752011936438014` +|=== diff --git a/modules/reference/pages/sql/sql-functions/math-functions/least.adoc b/modules/reference/pages/sql/sql-functions/math-functions/least.adoc new file mode 100644 index 000000000..3e4f97964 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/least.adoc @@ -0,0 +1,146 @@ += LEAST +:description: The LEAST() function returns the least or smallest value in a list of values. +:page-topic-type: reference + +The `LEAST()` function returns the least or smallest value in a list of values. It needs at least one argument to work with. If different types are mixed, like a text and a number, it returns an error. + +For example, comparing the greatest value among 4, "`two`", and 9 would result in an error. + +== Syntax + +The syntax for the `LEAST()` function is: + +[source,sql] +---- +LEAST(value_1, [value_n]) +---- + +Where: + +* `value_1`: Represents the first value. +* `value_n`: Represents one or more additional values, separated by commas. + +[NOTE] +==== +*Info:* + * `NULL` values in the list are ignored. + - The result will be `NULL` if all the expressions evaluate to `NULL`. +==== + +== Examples + +These examples show how to use the `LEAST()` function: + +=== Basic usage + +Consider the example: + +[source,sql] +---- +SELECT LEAST(3,5,8,9,10); +---- + +The query will return `3`, the smallest value among the provided values. + +[source,sql] +---- + least +------- + 3 +---- + +=== String comparison + +String comparison is also supported: + +[source,sql] +---- +SELECT LEAST('a','b','c','aa'); +---- + +In this case, the result will be `'a'`, as it is the smallest string. + +[source,sql] +---- + least +------- + a +---- + +=== Handle NULL values + +`NULL` values are ignored when determining the smallest value: + +[source,sql] +---- +SELECT LEAST (5,null,9); +---- + +The result will be the smallest non-NULL value, which is `5`. + +[source,sql] +---- + least +------- + 5 +---- + +=== Negative numbers + +Negative numbers can also be compared: + +[source,sql] +---- +SELECT LEAST (4,-4,-8,8); +---- + +This query will return `-8`, the smallest value among the provided numbers. + +[source,sql] +---- + least +------- + -8 +---- + +=== Use table data + +Suppose there is a table named `grades` containing columns `x`, `y`, and `z`. + +[source,sql] +---- +CREATE TABLE grades ( + name TEXT, + x INT, + y INT, + z INT +); + +INSERT INTO grades (name, x, y, z) +VALUES + ('Jane', 50, 0, 70), + ('Rio', 60, 30, 80), + ('John', 60, 60, 86), + ('Rose', 80, 90, 88), + ('Gary', 100, 80, 90); +---- + +To find the smallest value among these columns, use the query: + +[source,sql] +---- +SELECT *, LEAST(x, y, z) AS least_grade FROM grades; +---- + +This query will add a new column named `least_grade` to the result, displaying the smallest value among columns `x`, `y`, and `z`. + +[source,sql] +---- + name | x | y | z | least_grade +------+-----+----+----+------------- + Jane | 50 | 0 | 70 | 0 + Rio | 60 | 30 | 80 | 30 + John | 60 | 60 | 86 | 60 + Rose | 80 | 90 | 88 | 80 + Gary | 100 | 80 | 90 | 80 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/ln.adoc b/modules/reference/pages/sql/sql-functions/math-functions/ln.adoc new file mode 100644 index 000000000..38d815873 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/ln.adoc @@ -0,0 +1,79 @@ += LN +:description: The LN() function returns the natural logarithm of its argument. +:page-topic-type: reference + +The `LN()` function returns the natural logarithm of its argument. + +[NOTE] +==== +`LN()` does not accept negative numbers or zero. +==== + +== Syntax + +The syntax of the `LN()` function is: + +[source,sql] +---- +LN (x) +---- + +`x`: A positive number, or an expression that evaluates to a positive number. + +== Examples + +=== Basic LN() function + +The following example returns the natural logarithm of `7.87653`: + +[source,sql] +---- +SELECT LN(7.87653); +---- + +The query returns: + +[source,sql] +---- ++-------------+ +| f | ++-------------+ +| 2.0638874 | ++-------------+ +---- + +=== Use LN() function with a table + +This example combines the `LN()` function with a `CREATE TABLE` statement to obtain natural logarithmic values of a specific column: + +. Create a new table named `LNtable` with an integer `initValue` column. ++ +[source,sql] +---- +CREATE TABLE LNtable(initValue int); +INSERT INTO LNtable(initValue) +VALUES (75), (18), (28); +---- + +. Run this query to get the logarithm output of the column: ++ +[source,sql] +---- +SELECT * ,LN(initValue) AS lnValue FROM LNtable; +---- + +. The query returns the initial value and its natural logarithm: ++ +* `initValue`: The original integer values. +* `lnValue`: The natural logarithm values. ++ +[source,sql] +---- ++------------+---------------------------+ +| initValue | lnValue | ++------------+---------------------------+ +| 75 | 4.31748811353631 | +| 18 | 2.8903717578961645 | +| 28 | 3.332204510175204 | ++------------+---------------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/log.adoc b/modules/reference/pages/sql/sql-functions/math-functions/log.adoc new file mode 100644 index 000000000..f653edec0 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/log.adoc @@ -0,0 +1,173 @@ += LOG +:description: The LOG() function returns the base-10 logarithm or logarithm of the specified base of a given number. +:page-topic-type: reference + +The `LOG()` function returns the base-10 logarithm or logarithm of the specified base of a given number. + +== Syntax + +This example illustrates the syntax of the `LOG()` function: + +[source,sql] +---- +-- base-10 logarithm +LOG(number) + +-- logarithm of number +LOG(base, number) +---- + +Where: + +* `base`: The base number. It must be greater than 0 and not equal to 1. +* `number`: The number logarithm to obtain. It must be a positive number and greater than 0. + +== Examples +=== Base-10 logarithm + +==== Basic usage + +In this case, the `LOG()` function calculates the base-10 logarithm of a specified number. + +[source,sql] +---- +SELECT LOG(2), LOG(2.5); +---- + +The query returns: + +[source,sql] +---- + log | log +--------------------+--------- + 0.3010299956639812 | 0.39794 +---- + +==== Negative value + +In this example, the `LOG()` function is applied to negative numbers. + +[source,sql] +---- +SELECT LOG(-1); +---- + +Any input of negative values returns a `NaN` result. + +[source,sql] +---- + log +----- + NaN +---- + +==== NULL value + +The `LOG()` function will return `NULL` if the argument is `NULL`. + +[source,sql] +---- +SELECT LOG(NULL); +---- + +A null result is returned when an argument passed is null. + +[source,sql] +---- + log +----- +---- + +==== Zero value + +In this example, the `LOG()` takes zero as an argument. + +[source,sql] +---- +SELECT LOG(0); +---- + +The query returns: + +[source,sql] +---- + log +----------- + -Infinity +---- + +=== Logarithm with custom base + +==== Basic usage + +In this case, the `LOG()` function calculates the logarithm of a specified number. + +[source,sql] +---- +SELECT LOG(4, 16), + LOG(0.7, 0.8), + LOG(0.5, 10), + LOG(1, NULL); +---- + +The query returns: + +[source,sql] +---- + log | log | log | log +-----+------------+-----------+----- + 2 | 0.62562156 | -3.321928 | +---- + +==== Use with a table + +Consider a database table called *_data_* with the records: + +[source,sql] +---- +CREATE TABLE data ( + data_column TEXT, + x REAL, + y REAL +); + +INSERT INTO data (data_column, x, y) VALUES +('Data 1', 0.5, 2), +('Data 2', 1, 2), +('Data 3', 5, 2), +('Data 4', 10, 10), +('Data 5', 50, 10); + +SELECT * FROM data; +---- + +[source,sql] +---- + data_column | x | y +-------------+-----+---- + Data 1 | 0.5 | 2 + Data 2 | 1 | 2 + Data 3 | 5 | 2 + Data 4 | 10 | 10 + Data 5 | 50 | 10 +---- + +Use the `LOG()` function to calculate the logarithm of column *_x_* (as a base) and column _y_ (as a number): + +[source,sql] +---- +SELECT *, LOG(y, x) AS LOG_Value FROM data; +---- + +The query returns: + +[source,sql] +---- + data_column | x | y | log_value +-------------+-----+----+----------- + Data 1 | 0.5 | 2 | -1 + Data 2 | 1 | 2 | 0 + Data 3 | 5 | 2 | 2.321928 + Data 4 | 10 | 10 | 1 + Data 5 | 50 | 10 | 1.69897 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/power.adoc b/modules/reference/pages/sql/sql-functions/math-functions/power.adoc new file mode 100644 index 000000000..8ed02bdb1 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/power.adoc @@ -0,0 +1,93 @@ += POWER +:description: The POWER() function calculates the value of a number raised to the power of another number specified in the arguments. +:page-topic-type: reference + +The `POWER()` function calculates the value of a number raised to the power of another number specified in the arguments. + +== Syntax + +This example shows the syntax of the `POWER()` function: + +[source,sql] +---- +POWER(a,b) +---- + +Where: + +* `a`: The base number. +* `b`: The exponent to which the base number is raised. + +== Examples +=== Basic usage + +In this case, the `POWER()` function calculates the result of raising one number to the power of another. + +[source,sql] +---- +SELECT POWER(3, 4) AS "Example 1", + POWER(7, 3) AS "Example 2"; +---- + +The query returns: + +[source,sql] +---- + Example 1 | Example 2 +-----------+----------- + 81 | 343 +---- + +=== Use `POWER()` with negative values + +In this case, the `POWER()` function is applied to negative numbers. + +[source,sql] +---- +SELECT POWER(-4, -5), POWER(-1, -2), POWER(-6, -7); +---- + +The query returns: + +[source,sql] +---- + power | power | power +-------+-------+------- + -1024 | 1 | 0 +---- + +=== Use `POWER()` with floating-point numbers + +In this example, use the `POWER()` function to calculate 2.5 raised to the power of 3.0. + +[source,sql] +---- +SELECT POWER(2.5, 3.0) AS power_result; +---- + +The result, 15.625, is the value obtained by raising 2.5 to the third power. + +[source,sql] +---- + power_result +-------------- + 15.625 +---- + +=== Zero to the power of zero + +This case shows that 0 expression raised to the power of 0 returns 1. + +[source,sql] +---- +SELECT POWER(0, 0); +---- + +The query returns: + +[source,sql] +---- + power +------- + 1 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/random.adoc b/modules/reference/pages/sql/sql-functions/math-functions/random.adoc new file mode 100644 index 000000000..74e96b67a --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/random.adoc @@ -0,0 +1,82 @@ += RANDOM +:description: The RANDOM() function in Redpanda SQL generates a random number within a defined range. +:page-topic-type: reference + +The `RANDOM()` function in Redpanda SQL generates a random number within a defined range. By default, the range is between 0 (inclusive) and 1 (exclusive), resulting in a value greater than or equal to 0 and less than 1. + +== Syntax + +The syntax for generating a random integer or floating-point number using the `RANDOM()` function is: + +[source,sql] +---- +RANDOM() +---- + +[NOTE] +==== +There are no parameters or arguments for the `RANDOM()` function. +==== + +== Examples + +=== Generate a random number + +The RANDOM() function generates a random number greater than or equal to zero but less than one by default. Use this syntax to retrieve a random number: + +[source,sql] +---- +SELECT RANDOM(); +---- + +The result is a random number greater than 0 and less than 1. However, it will never return the maximum value of 1. + +[source,sql] +---- ++-----------------------+ +| f | ++-----------------------+ +| 0.9122627193276355 | ++-----------------------+ +---- + +=== Generate a random decimal number within a range + +To generate a random decimal number between two values: + +[source,sql] +---- +SELECT RANDOM()*(b-a)+a; +---- + +Where: + +* *"`a`"* represents the lower bound of the range. +* *"`b`"* represents the upper bound of the range. + +The return value will be a random floating-point number greater than or equal to a and less than b. + +*Example* + +To generate a random decimal number greater than or equal to 10 and less than 25: + +[source,sql] +---- +SELECT RANDOM()*(25 - 10)+10; +---- + +This example shows how to retrieve a random number: + +[source,sql] +---- ++-----------------------+ +| f | ++-----------------------+ +| 18.156098711616043 | ++-----------------------+ +---- + +[WARNING] +==== +It is important to note that the function will never return the maximum value of b. +==== diff --git a/modules/reference/pages/sql/sql-functions/math-functions/round.adoc b/modules/reference/pages/sql/sql-functions/math-functions/round.adoc new file mode 100644 index 000000000..fd8d1bec7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/round.adoc @@ -0,0 +1,63 @@ += ROUND +:description: The ROUND() function rounds numbers using round half to even method (bankers rounding). +:page-topic-type: reference + +The `ROUND()` function rounds numbers using round half to even method (bankers rounding). + +== Syntax + +[source,sql] +---- +ROUND(number) +ROUND(number, scale) +---- + +== Arguments + +* `number`: The number to round. It can be positive, negative, or zero, and it can be an xref:reference:sql/sql-data-types/numeric-type/numeric.adoc[Integer] or a xref:reference:sql/sql-data-types/numeric-type/numeric.adoc[Double Precision]. +* `scale`: Optional. An integer specifying the number of decimal places to round to. When omitted, the function rounds to the nearest integer. A negative scale rounds to the left of the decimal point (for example, `ROUND(1234, -2)` returns `1200`). + +== Examples + +=== Round to integer + +In this example, the function rounds decimal numbers to integers: + +[source,sql] +---- +SELECT + round(28.11) AS "round(28.11)", + round(12.51) AS "round(12.51)", + round(-9.11) AS "round(-9.11)", + round(102.5) AS "round(102.5)", + round(101.5) AS "round(101.5)", + round(-40.51) AS "round(-40.51)"; +---- + +The query will return the nearest integer for all provided values. + +[source,sql] +---- + round(28.11) | round(12.51) | round(-9.11) | round(102.5) | round(101.5) | round(-40.51) +--------------+--------------+--------------+--------------+---------------+--------------- + 28 | 13 | -9 | 102 | 102 | -41 +---- + +=== Round to a specific number of decimal places + +Use the two-argument form to specify the number of decimal places: + +[source,sql] +---- +SELECT + round(3.14159, 2) AS "round(3.14159, 2)", + round(123.456, 1) AS "round(123.456, 1)", + round(99.995, 2) AS "round(99.995, 2)"; +---- + +[source,sql] +---- + round(3.14159, 2) | round(123.456, 1) | round(99.995, 2) +-------------------+-------------------+------------------ + 3.14 | 123.5 | 100 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/sign.adoc b/modules/reference/pages/sql/sql-functions/math-functions/sign.adoc new file mode 100644 index 000000000..832285e5b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/sign.adoc @@ -0,0 +1,105 @@ += SIGN +:description: The SIGN() function returns a sign of an argument. +:page-topic-type: reference + +The `SIGN()` function returns a sign of an argument. The returned values are -1 if the argument is less than zero, 1 if the argument is greater than zero, 0 if the argument is equal to zero. + +== Syntax + +The syntax for the `SIGN()` function is: + +[source,sql] +---- +SIGN(x) +---- + +The `SIGN()` function requires one argument: + +* `x`: An expression that evaluates to a number. + +== Examples + +=== Sign of a number + +This example demonstrates how the `SIGN()` function can be used to obtain the sign of a number: + +[source,sql] +---- +SELECT + SIGN(0.1) AS "SIGN(0.1)", + SIGN(999) AS "SIGN(999)", + SIGN(0) AS "SIGN(0)", + SIGN(-0) AS "SIGN(-0)"; +---- + +The query will return the signs of the passed arguments: + +[source,sql] +---- + SIGN(0.1) | SIGN(999) | SIGN(0) | SIGN(-0) +-----------+-----------+---------+---------- + 1 | 1 | 0 | 0 +---- + +Note: `-0` is accepted as an argument and is equal to zero + +=== SIGN() function with an expression + +This example demonstrates how to use the `SIGN()` function with an expression: + +[source,sql] +---- +SELECT SIGN(100 - 200); +---- + +will return the sign of the expression evaluation: + +[source,sql] +---- + sign +------ + -1 +------ +---- + +=== Use the SIGN() function with a table + +This example demonstrates how to use the `SIGN()` function with a table to obtain the absolute values of all numbers in a specific column: + +. Create a table signTable containing an *_value_* column with some positive, negative and equal to zero values: ++ +[source,sql] +---- +CREATE TABLE signTable(value float); + +INSERT INTO signTable(value) +VALUES +(1000), +(-200), +(0), +(0.22), +(-12.3), +(-0.0); +---- + +. Use this query to find the sign of all inserted values: ++ +[source,sql] +---- +SELECT value, SIGN(value) AS sign +FROM signTable; +---- + +. Result: ++ +[source,sql] +---- + value | sign +-------+------ + 1000 | 1 + -200 | -1 + 0 | 0 + 0.22 | 1 + -12.3 | -1 + -0 | 0 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/sin.adoc b/modules/reference/pages/sql/sql-functions/math-functions/sin.adoc new file mode 100644 index 000000000..a2020100d --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/sin.adoc @@ -0,0 +1,134 @@ += SIN +:description: SIN() is a numeric function that returns the trigonometric sine value of a specified angle in radians. +:page-topic-type: reference + +`SIN()` is a numeric function that returns the trigonometric sine value of a specified angle in radians. + +== Syntax + +The syntax of the `SIN()` function is: + +[source,sql] +---- +SIN (x) +---- + +The `SIN()` function requires one argument: + +`x`: A positive or a negative angle (or an expression that evaluates to an angle). + +== Examples + +=== Sine a positive value + +This example uses the `SIN()` function with a positive angle as the argument. + +[source,sql] +---- +SELECT SIN(5); +---- + +This returns the sine value of 5. + +[source,sql] +---- ++-----------------------+ +| f | ++-----------------------+ +| -0.9589242746631385 | ++-----------------------+ +---- + +=== Sine a negative value + +This example shows the `SIN()` function with a negative angle as the argument: + +[source,sql] +---- +SELECT SIN(-3); +---- + +The query returns: + +[source,sql] +---- ++----------------------+ +| f | ++----------------------+ +| -0.1411200080598672 | ++----------------------+ +---- + +=== Sine a fraction value + +This example shows the `SIN()` function with a fractional value as the argument: + +[source,sql] +---- +SELECT SIN(5.8732); +---- + +The query returns: + +[source,sql] +---- ++----------------------+ +| f | ++----------------------+ +| -0.3985959081271079 | ++----------------------+ +---- + +=== Sine with an expression + +The `SIN()` function can also include an expression: + +[source,sql] +---- +SELECT sin(8.5 * 2.3); +---- + +The query returns: + +[source,sql] +---- ++-----------------------+ +| f | ++-----------------------+ +| 0.6445566903363104 | ++-----------------------+ +---- + +=== Use the `SIN()` function with a table + +This example combines the `SIN()` function with a `CREATE TABLE` statement to obtain the sine values of a specific column: + +. Create a new table named *sineTable* containing the *initialValue* column. Input some values with the negative and positive angles into the column. ++ +[source,sql] +---- +CREATE TABLE sineTable(initialValue int); +INSERT INTO sineTable(initialValue) +VALUES (-5),(18), (0),(-27); +---- + +. Run this query to get the sine value output: ++ +[source,sql] +---- +SELECT * ,SIN(initialValue) AS sinValue FROM sineTable; +---- + +. The final result will have the *initialValue* column with the source value and the *sinValue* column with their calculated sine values. ++ +[source,sql] +---- ++---------------+-------------------------------+ +| initialvalue | sinvalue | ++---------------+-------------------------------+ +| -5 | 0.9589242746631385 | +| 18 | -0.7509872467716762 | +| 0 | 0 | +| -27 | -0.956375928404503 | ++---------------+-------------------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/sinh.adoc b/modules/reference/pages/sql/sql-functions/math-functions/sinh.adoc new file mode 100644 index 000000000..d1c2fed27 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/sinh.adoc @@ -0,0 +1,82 @@ += SINH +:description: The SINH() function returns the hyperbolic sine of a specified numeric argument. +:page-topic-type: reference + +The `SINH()` function returns the hyperbolic sine of a specified numeric argument. + +== Syntax + +[source,sql] +---- +SINH(x) +---- + +== Arguments + +* `x`: A positive or negative number of type `real` or `double precision`. + +== Return type + +Same as the input type (`real` or `double precision`). + +== Examples + +=== Positive value + +[source,sql] +---- +SELECT SINH(1); +---- + +[source,sql] +---- + sinh +-------------------- + 1.1752011936438014 +(1 row) +---- + +=== Negative value + +[source,sql] +---- +SELECT SINH(-1); +---- + +[source,sql] +---- + sinh +--------------------- + -1.1752011936438014 +(1 row) +---- + +=== Zero + +[source,sql] +---- +SELECT SINH(0); +---- + +[source,sql] +---- + sinh +------ + 0 +(1 row) +---- + +=== Expression argument + +[source,sql] +---- +SELECT SINH(2.5 * 0.4); +---- + +[source,sql] +---- + sinh +-------------------- + 1.1752011936438014 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/sqrt.adoc b/modules/reference/pages/sql/sql-functions/math-functions/sqrt.adoc new file mode 100644 index 000000000..5a047ba11 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/sqrt.adoc @@ -0,0 +1,118 @@ += SQRT +:description: The SQRT() function returns the square root of a given positive number. +:page-topic-type: reference + +The `SQRT()` function returns the square root of a given positive number. + +== Syntax + +The syntax for the `SQRT()` function in Redpanda SQL is: + +[source,sql] +---- +SQRT(x) +---- + +The `SQRT()` function requires one argument: + +* `x`: A positive number or an expression that evaluates to a positive number. + +== Examples + +=== SQRT() a positive value + +This example demonstrates how to find the square root of a positive integer with `SQRT()`: + +[source,sql] +---- +SELECT SQRT(81); +---- + +The returned result: + +[source,sql] +---- ++-----+ +| f | ++-----+ +| 9 | ++-----+ +---- + +=== SQRT() with an expression + +This example shows how to use the `SQRT()` function to find the square root of the result of an expression: + +[source,sql] +---- +SELECT SQRT(60 + 4); +---- + +The result of this statement is the square root of 64: + +[source,sql] +---- ++-----+ +| f | ++-----+ +| 8 | ++-----+ +---- + +=== SQRT() with double precision result + +In addition to integers, Redpanda SQL also supports calculating square roots with floating-point numbers as the outcome, as shown in this example: + +[source,sql] +---- +SELECT SQRT(70); +---- + +The output of this statement is 8.3666, which is the square root of 70 with double precision: + +[source,sql] +---- ++----------+ +| f | ++----------+ +| 8.3666 | ++----------+ +---- + +=== SQRT() a negative number + +This example demonstrates how attempting to use the `SQRT()` function with a negative value returns an error: + +[source,sql] +---- +SELECT SQRT(-25); +---- + +As the `SQRT()` function only accepts positive numbers, it returns a *_NaN (Not a Number)_* result for the square root of -25: + +[source,sql] +---- ++-------+ +| f | ++-------+ +| NaN | ++-------+ +---- + +=== SQRT operator (`|/(x)`) + +This example uses the SQRT operator (`|/(x)`) to calculate the square root of a number: + +[source,sql] +---- +SELECT |/(169) AS sqrt_operator; +---- + +This example calculates the square root of 169 using the SQRT operator. The result of this query will be: + +[source,sql] +---- + sqrt_operator +--------------- + 13 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/to-char-from-number.adoc b/modules/reference/pages/sql/sql-functions/math-functions/to-char-from-number.adoc new file mode 100644 index 000000000..0e385e94f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/to-char-from-number.adoc @@ -0,0 +1,136 @@ += TO_CHAR from Number +:description: The TO_CHAR function formats a number into a string using a given format. +:page-topic-type: reference + +The `TO_CHAR` function formats a number into a string using a given format. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +TO_CHAR(value, format_string) +---- + +Parameters in the syntax include: + +* `value`: Number to format as a string. +* `format`: The format of the input string. + +== Format + +The format string supports these template patterns (case-insensitive): + +[width="100%",cols="16%,84%",options="header",] +|=== +|*Pattern* |*Description* +|`9` |Digit position (may be dropped if insignificant) +|`0` |Digit position (never dropped) +|`.` |Decimal point +|`,` |Group (thousands) separator +|`D` |Decimal point +|`G` |Group separator +|`S` |Plus/minus sign directly before or after a number +|`PL` |Plus sign in the specified position (for negative numbers) +|`MI` |Minus sign in specified position (for positive numbers) +|`SG` |Plus/minus sign in the specified position. +|=== + +=== Limitations + +* All text inside double quote `"\{text}"` will not be considered a pattern. +* The quote character `""` will not appear in the result string. +* Any text that does not match any pattern is preserved in the result string. + +== Examples + +=== Format with leading zeros + +The query formats 123.456 with leading zeros using the pattern '`00000.00000`'. + +[source,sql] +---- +SELECT TO_CHAR(123.456, '00000.00000'); +---- + +The query returns: + +[source,sql] +---- + to_char +-------------- + 00123.45600 +---- + +=== Format with variable length + +The query formats the number 123.456 with a variable-length pattern '`99999.99999`'. + +[source,sql] +---- +SELECT TO_CHAR(123.456, '99999.99999'); +---- + +The query returns: + +[source,sql] +---- + to_char +-------------- + 123.45600 +---- + +=== Format with group + +The query formats the number 123456 with grouping separators using the pattern '`9,999,999,999`'. + +[source,sql] +---- +SELECT TO_CHAR(123456, '9,999,999,999'); +---- + +The query returns: + +[source,sql] +---- + to_char +---------------- + 123,456 +---- + +=== Format with negative number + +The query formats the number -123 with a custom pattern including the sign. + +[source,sql] +---- +SELECT TO_CHAR(-123, '"Number formatted with pattern:000S":{000S}'); +---- + +The output shows the custom-formatted number. + +[source,sql] +---- + to_char +------------------------------------------- + Number formatted with pattern:000S:{123-} +---- + +=== Format with sign + +The query formats the number -123.456 with a custom pattern including the sign and separated integer. + +[source,sql] +---- +SELECT TO_CHAR(-123.456, '"Sing is: "SG" integer part is: "999", mantissa part is: ".999'); +---- + +The query returns: + +[source,sql] +---- + to_char +--------------------------------------------------------- + Sing is: - integer part is: 123, mantissa part is: .456 +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/coalesce.adoc b/modules/reference/pages/sql/sql-functions/other-functions/coalesce.adoc new file mode 100644 index 000000000..3c9bbe870 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/coalesce.adoc @@ -0,0 +1,191 @@ += coalesce() +:description: The COALESCE() function returns the first non-NULL argument from a list of arguments. +:page-topic-type: reference + +The `COALESCE()` function returns the first non-NULL argument from a list of arguments. After finding the first non-NULL argument, the function stops evaluating the remaining arguments. + +[NOTE] +==== +If all arguments are NULL, `COALESCE()` returns NULL. +==== + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +COALESCE (argument_1, argument_2, …); +---- + +Key points from the syntax: + +* `COALESCE()` requires a minimum of two inputs. +* It can take an unlimited number of arguments. +* Evaluation occurs sequentially from left to right, stopping at the first non-null value. + +== Examples + +Here are some examples to illustrate the application of `COALESCE()`: + +=== Return the first non-null value + +In this example, a set of values is provided. The `COALESCE()` function returns the first non-null value from this set. + +[source,sql] +---- +SELECT COALESCE(9, 3, 8, 7, 1); +---- + +The result will be `9`, the first value without NULL among the provided options. + +[source,sql] +---- + coalesce +---------- + 9 +---- + +=== Handle NULL value as the last argument + +Include NULL as the final argument and check the query output. + +[source,sql] +---- +Select COALESCE(3,4,5,9,10,NULL); +---- + +The function output is `3` because it returns the first non-null value. + +[source,sql] +---- + coalesce +---------- + 3 +---- + +=== Handle NULL value as the first argument + +Consider NULL as the first argument in this example: + +[source,sql] +---- +Select COALESCE(NULL,1,5,7,9,2); +---- + +The query returns `1`, as it is the first non-null value of the argument. + +[source,sql] +---- + coalesce +---------- + 1 +---- + +=== Handle multiple NULL values + +In this query, NULL appears in the first, second, fourth, and last positions: + +[source,sql] +---- +Select COALESCE(NULL, NULL ,3, NULL, 7,9,4,5, NULL); +---- + +The `COALESCE()` function ignores the first two NULLs and returns the first non-null value, `3`. It does not process the subsequent NULL values. + +[source,sql] +---- + coalesce +---------- + 3 +---- + +=== Handle all NULL values + +Assume that the given values are entirely composed of nulls. + +[source,sql] +---- +Select COALESCE(NULL, NULL ,NULL, NULL); +---- + +In this case, the `COALESCE()` function returns an empty value (NULL). + +[source,sql] +---- + coalesce +---------- +---- + +=== `COALESCE()` with table data + +Consider the `employee_absent` table, which comprises a mix of NULL and non-null values: + +[source,sql] +---- +CREATE TABLE employee_absent ( + emp_name TEXT, + emp_dept TEXT, + absent TEXT +); + +INSERT INTO employee_absent (emp_name, emp_dept, absent) +VALUES + ('Alice', 'Finance', 'absent'), + ('Bob', 'Operations', 'absent'), + ('Carol', 'Finance', 'absent'), + ('David', 'HR', NULL), + ('Emily', 'HR', NULL); +---- + +Use the `SELECT` statement to display all the records: + +[source,sql] +---- +SELECT * FROM employee_absent; +---- + +[source,sql] +---- + emp_name | emp_dept | absent +----------+------------+-------- + Alice | Finance | absent + Bob | Operations | absent + Carol | Finance | absent + David | HR | + Emily | HR | +---- + +The following query uses the `COALESCE()` function on the `absent` column. It retrieves names and absences (with `out of office` for NULL values) for each employee. + +[source,sql] +---- +SELECT emp_name, COALESCE(absent, 'out of office') AS DisplayAbsent FROM employee_absent; +---- + +[source,sql] +---- + emp_name | displayabsent +----------+--------------- + Alice | absent + Bob | absent + Carol | absent + David | out of office + Emily | out of office +---- + +=== Error output in `COALESCE()` + +When specifying arguments with different datatypes, they should be convertible. + +[source,sql] +---- +Select Coalesce ('x',NULL,1); +---- + +If the datatypes cannot be converted, the `COALESCE()` function generates an error. + +[source,sql] +---- +ERROR: invalid input syntax for type integer: "x" +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/col-description.adoc b/modules/reference/pages/sql/sql-functions/other-functions/col-description.adoc new file mode 100644 index 000000000..ea518193c --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/col-description.adoc @@ -0,0 +1,27 @@ += col_description() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-COMMENT[col_description()^] is a comment information function that +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-COMMENT[col_description()^] is a comment information function that retrieves the comment associated with a specified table column. + +== Syntax + +The syntax for this function is: + +.... +col_description (table_oid, column_number) → NULL +.... + +== Parameters + +* link:https://www.postgresql.org/docs/current/datatype-oid.html[table_oid^]: specifies the object identifier (OID) of the table containing the column from which to retrieve the comment +* link:https://www.postgresql.org/docs/current/datatype-numeric.html#DATATYPE-INT[column_number^]: indicates the ordinal position of the column within the table (starting from 1 for the first column) + +[NOTE] +==== +It is important to note that the column number must be provided as an object identifier (OID), which can be achieved by casting the table name to `regclass` +==== + +== Restrictions + +* This function always returns `NULL` if there are no parameters specified. diff --git a/modules/reference/pages/sql/sql-functions/other-functions/current-database.adoc b/modules/reference/pages/sql/sql-functions/other-functions/current-database.adoc new file mode 100644 index 000000000..d03d11c75 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/current-database.adoc @@ -0,0 +1,34 @@ += current_database() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[current_database()^] is a session information function that +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[current_database()^] is a session information function that returns the current database's name. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +SELECT current_database(); +---- + +== Examples + +This example shows how to obtain the name of the currently connected database: + +[source,sql] +---- +SELECT current_database(); +---- + +The query returns the output: + +[source,sql] +---- ++------------+ +| f       | ++------------+ +| Oxla   | ++------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/current-schema.adoc b/modules/reference/pages/sql/sql-functions/other-functions/current-schema.adoc new file mode 100644 index 000000000..7d5a152dd --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/current-schema.adoc @@ -0,0 +1,45 @@ += current_schema() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[current_schema()^] is a session information function that r +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[current_schema()^] is a session information function that returns the name of the first existing schema. + +== Syntax + +There are two available syntax versions of `current_schema()` function: + +.Version 1 +[source,sql] +---- +SELECT current_schema(); +---- + +.Version 2 +[source,sql] +---- +SELECT current_schema; +---- +[NOTE] +==== +Returns `NULL` if none of the schemas from `search_path` exist +==== + +== Examples + +This example shows how to get the current schema name using this function: + +[source,sql] +---- +SELECT current_schema(); +---- + +Example output: + +[source,sql] +---- ++------------+ +| f | ++------------+ +| public | ++------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/generate-series.adoc b/modules/reference/pages/sql/sql-functions/other-functions/generate-series.adoc new file mode 100644 index 000000000..b89735a55 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/generate-series.adoc @@ -0,0 +1,115 @@ += GENERATE_SERIES +:description: The GENERATE_SERIES function generates a set of values from start to stop with an optional step increment. +:page-topic-type: reference + +The `GENERATE_SERIES` function generates a set of values from a start value to a stop value with an optional step increment. Use it as a table function in the `FROM` clause. + +== Syntax + +[source,sql] +---- +GENERATE_SERIES(start, stop) +GENERATE_SERIES(start, stop, step) +---- + +== Arguments + +* `start`: The first value in the series. Type: `BIGINT`. +* `stop`: The last value in the series (inclusive). Type: `BIGINT`. +* `step`: Optional. The increment between values. Defaults to `1`. Use a negative value to generate a descending series. Type: `BIGINT`. + +If `step` is positive and `start` is greater than `stop`, an empty set is returned. If `step` is negative and `start` is less than `stop`, an empty set is returned. + +== Examples + +=== Generate an ascending series + +[source,sql] +---- +SELECT * FROM GENERATE_SERIES(1, 5); +---- + +[source,sql] +---- + generate_series +----------------- + 1 + 2 + 3 + 4 + 5 +(5 rows) +---- + +=== Generate a series with a custom step + +[source,sql] +---- +SELECT * FROM GENERATE_SERIES(0, 10, 2); +---- + +[source,sql] +---- + generate_series +----------------- + 0 + 2 + 4 + 6 + 8 + 10 +(6 rows) +---- + +=== Generate a descending series + +[source,sql] +---- +SELECT * FROM GENERATE_SERIES(5, 1, -1); +---- + +[source,sql] +---- + generate_series +----------------- + 5 + 4 + 3 + 2 + 1 +(5 rows) +---- + +=== Filter a series with WHERE + +[source,sql] +---- +SELECT * FROM GENERATE_SERIES(1, 10) WHERE generate_series % 2 = 0; +---- + +[source,sql] +---- + generate_series +----------------- + 2 + 4 + 6 + 8 + 10 +(5 rows) +---- + +=== Aggregate a series + +[source,sql] +---- +SELECT SUM(generate_series) FROM GENERATE_SERIES(1, 100); +---- + +[source,sql] +---- + sum +------ + 5050 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/has-schema-privilege.adoc b/modules/reference/pages/sql/sql-functions/other-functions/has-schema-privilege.adoc new file mode 100644 index 000000000..6f1e18e13 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/has-schema-privilege.adoc @@ -0,0 +1,70 @@ += has_schema_privilege() +:description: The has_schema_privilege() function checks whether the current user has specific privileges on a schema. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-ACCESS[`has_schema_privilege()`^] function is an access privilege inquiry function that checks whether the current user has specific privileges on a schema. + +== Syntax + +The `has_schema_privilege` function has two available syntax versions: + +[source,sql] +---- +SELECT has_schema_privilege('user', 'schema', 'privilege'); +---- + +[source,sql] +---- +SELECT has_schema_privilege('schema', 'privilege'); +---- + +No matter which syntax version you choose, the `has_schema_privilege()` function always returns `TRUE (t)`. + +== Parameters + +* `schema`: Name of the schema to check privileges for (can be any string value or string columns from other tables). +* `user`: Name of the user who has the privileges (can be any string value). +* `privilege`: Specifies the specific privilege to check for in the schema. The function currently supports `create` and `usage`. + +[NOTE] +==== +The comparison for the `privilege` is case-insensitive, so you can use lowercase or uppercase notation for the privilege name. +==== + +== Examples + +=== Check for CREATE privilege + +This example uses the `has_schema_privilege()` function to determine whether the current user has the `create` privilege on a schema named `public`: + +[source,sql] +---- +SELECT has_schema_privilege('public', 'create'); +---- + +The query returns `TRUE`, which means that the current user has a `create` privilege on the `public` schema. + +[source,sql] +---- + has_schema_privilege +---------------------- + t +---- + +=== Check for USAGE privilege + +Use the `has_schema_privilege()` function to check for the `usage` privilege on a schema. For example, to check if the current user can create objects in the "`*public*`" schema, run: + +[source,sql] +---- +SELECT has_schema_privilege('cahyo', 'public', 'USAGE'); +---- + +The query returns `TRUE`, which means the current user has `usage` privilege on the `public` schema. + +[source,sql] +---- + has_schema_privilege +---------------------- + t +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/index.adoc b/modules/reference/pages/sql/sql-functions/other-functions/index.adoc new file mode 100644 index 000000000..663c440cd --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/index.adoc @@ -0,0 +1,31 @@ += Other functions +:description: Reference for other Redpanda SQL functions, including coalesce, nullif, and PostgreSQL system information functions. +:page-topic-type: reference + +Besides xref:reference:sql/sql-functions/math-functions/index.adoc[math], xref:reference:sql/sql-functions/aggregate-functions/index.adoc[aggregate], xref:reference:sql/sql-functions/window-functions/index.adoc[window], xref:reference:sql/sql-functions/string-functions/index.adoc[string], xref:reference:sql/sql-functions/timestamp-functions/index.adoc[timestamp], and xref:reference:sql/sql-functions/json-functions/index.adoc[JSON] functions, Redpanda SQL also supports these functions: + +[cols="46%,54%",options="header",] +|=== +|Function |Description +|xref:reference:sql/sql-functions/other-functions/coalesce.adoc[coalesce()] |Returns the first argument that is not NULL, while the remaining arguments from the first non-null argument are not evaluated. +|xref:reference:sql/sql-functions/other-functions/current-database.adoc[current_database()] |Returns the current database's name. +|xref:reference:sql/sql-functions/other-functions/current-schema.adoc[current_schema()] |Returns the schema's name (first in the search path). +|xref:reference:sql/sql-functions/other-functions/has-schema-privilege.adoc[has_schema_privilege()] |Checks whether the current user has specific privileges on a schema. +|xref:reference:sql/sql-functions/other-functions/nullif.adoc[nullif()] |Replaces a given value with NULL if it matches a specific criterion. +|xref:reference:sql/sql-functions/other-functions/pg-get-expr.adoc[pg_get_expr()] |Retrieves the internal form of an individual expression (such as the default value for a column). +|xref:reference:sql/sql-functions/other-functions/pg-total-relation-size.adoc[pg_total_relation_size()] |Retrieves the size of a table. +|xref:reference:sql/sql-functions/other-functions/pg-typeof.adoc[pg_typeof()] |Retrieves the data type of any given value. +|xref:reference:sql/sql-functions/other-functions/pg-encoding-to-char.adoc[pg_encoding_to_char()] |Converts an encoding internal identifier to a human-readable name. +|xref:reference:sql/sql-functions/other-functions/pg-get-indexdef.adoc[pg_get_indexdef()] |Reconstructs the PostgreSQL command used to retrieve the definition of a specified index. +|xref:reference:sql/sql-functions/other-functions/pg-get-userbyid.adoc[pg_get_userbyid()] |Retrieves the name of a user (role) given its unique identifier (OID). +|xref:reference:sql/sql-functions/other-functions/pg-relation-is-publishable.adoc[pg_relation_is_publishable()] |Determines whether a specified relation (table) can be published in a publication. +|xref:reference:sql/sql-functions/other-functions/pg-size-pretty.adoc[pg_size_pretty()] |Converts sizes in bytes into a human-readable format. +|xref:reference:sql/sql-functions/other-functions/pg-table-size.adoc[pg_table_size()] |Retrieves the size of a specific table, including its associated storage components but excluding indexes. +|xref:reference:sql/sql-functions/other-functions/pg-table-is-visible.adoc[pg_table_is_visible()] |Checks whether a specified table (or other database object) is visible in the current schema search path. +|xref:reference:sql/sql-functions/other-functions/pg-get-constraintdef.adoc[pg_get_constraintdef()] |Retrieves the definition of a specific constraint in a human-readable format. +|xref:reference:sql/sql-functions/other-functions/pg-get-statisticsobjdef-columns.adoc[pg_get_statisticsobjdef_columns()] |Retrieves the definitions of columns associated with a specified statistics object. +|xref:reference:sql/sql-functions/other-functions/obj-description.adoc[obj_description()] |Returns the comment associated with a specific database object. +|xref:reference:sql/sql-functions/other-functions/col-description.adoc[col_description()] |Retrieves the comment associated with a specified table column based on its name. +|xref:reference:sql/sql-functions/other-functions/shobj-description.adoc[shobj_description()] |Retrieves the comment associated with a shared database object. +|xref:reference:sql/sql-functions/other-functions/pg-backend-pid.adoc[pg_backend_pid()] |Returns the process ID (PID) of the node handling the current session. +|=== diff --git a/modules/reference/pages/sql/sql-functions/other-functions/nullif.adoc b/modules/reference/pages/sql/sql-functions/other-functions/nullif.adoc new file mode 100644 index 000000000..d7b160cc7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/nullif.adoc @@ -0,0 +1,194 @@ += nullif() +:description: The NULLIF() function replaces a given value with NULL if it matches a specific criterion. +:page-topic-type: reference + +The `NULLIF()` function replaces a given value with NULL if it matches a specific criterion. + +== Syntax + +[source,sql] +---- +NULLIF(argument_1,argument_2); +---- + +The `NULLIF` function takes two arguments: + +* The first argument is the value to evaluate. +* The second argument is the value to treat as NULL if the first argument matches it. + +[TIP] +==== +If the first argument matches the second argument, the `NULLIF()` function returns `NULL`. Otherwise, it returns the first argument as-is. +==== + +== Examples + +=== Handle equal values + +In this case, the `NULLIF` function compares the values 4 and 4. + +[source,sql] +---- +SELECT NULLIF (4, 4); +---- + +The result is `NULL` because the two values being compared are equal (4 = 4). + +[source,sql] +----- + if +---- +----- + +=== Handle different values + +This example uses the `NULLIF` function to manage different values. + +[source,sql] +---- +SELECT NULLIF (9, 0); +---- + +The result is `9` because the second value in the `NULLIF` function is 0 (the two values are not equal). + +[source,sql] +---- + if + + 9 +---- + +=== String comparison + +In this case, the `NULLIF` function compares the strings `L` and `O`. + +[source,sql] +---- +SELECT NULLIF ('L', 'O'); +---- + +The result is `L` because the two strings being compared (`L` and `O`) are not equal. Therefore, the function returns the first string. + +[source,sql] +---- + if + + L +---- + +=== Handle default values + +Suppose there is an `employees` table with columns for `name` and `salary`. This query retrieves employee names and their adjusted salaries, where a salary of 0 is replaced with NULL: + +[source,sql] +---- +CREATE TABLE employees ( + name TEXT, + salary INT +); + +INSERT INTO employees (name, salary) +VALUES + ('John', 50000), + ('Jane', 0), + ('Roy', 0), + ('NEil', 0), + ('Michael', 75000); +---- + +View the records: + +[source,sql] +---- +SELECT * FROM employees; +---- + +[source,sql] +---- + name | salary +---------+-------- + John | 50000 + Jane | 0 + Roy | 0 + NEil | 0 + Michael | 75000 +---- + +This query retrieves employee names and their adjusted salaries, where a salary of 0 is replaced with NULL: + +[source,sql] +---- +SELECT name, NULLIF(salary, 0) AS adjusted_salary +FROM employees; +---- + +The `NULLIF` function checks whether the `salary` value is 0. If it is, the function returns NULL. Otherwise, it returns the original `salary` value. + +[source,sql] +---- + name | adjusted_salary +---------+----------------- + John | 50000 + Jane | + Roy | + NEil | + Michael | 75000 +---- + +=== Avoid division by zero + +Suppose there is a `fractions` table with columns, a `numerator` and a `denominator`. + +[source,sql] +---- +CREATE TABLE fractions ( + numerator INT, + denominator INT +); + +INSERT INTO fractions (numerator, denominator) +VALUES + (10, 2), + (20, 0), + (15, 3), + (75, 0), + (15, 3); +---- + +View the table: + +[source,sql] +---- +SELECT * FROM fractions; +---- + +[source,sql] +---- + numerator | denominator +-----------+------------- + 10 | 2 + 20 | 0 + 15 | 3 + 75 | 0 + 15 | 3 +---- + +Here, the `NULLIF` function is applied to the `denominator` column. If the `denominator` is 0, the function returns NULL, avoiding division by zero. + +[source,sql] +---- +SELECT numerator, denominator, numerator / NULLIF(denominator, 0) AS "result" FROM fractions; +---- + +The result is shown in the result column. + +[source,sql] +---- + numerator | denominator | result +-----------+-------------+-------- + 10 | 2 | 5 + 20 | 0 | + 15 | 3 | 5 + 75 | 0 | + 15 | 3 | 5 +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/obj-description.adoc b/modules/reference/pages/sql/sql-functions/other-functions/obj-description.adoc new file mode 100644 index 000000000..77bb441f1 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/obj-description.adoc @@ -0,0 +1,21 @@ += obj_description() +:description: The obj_description() function returns the comment associated with a specific database object. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-COMMENT[`obj_description()`^] is a comment information function that returns the comment associated with a specific database object. + +== Syntax + +[source,text] +---- +obj_description (object_oid, catalog_name) → NULL +---- + +== Parameters + +* link:https://www.postgresql.org/docs/current/datatype-oid.html[`object_oid`^]: Specifies the object identifier (OID) of the database object to retrieve the comment for. +* link:https://www.postgresql.org/docs/current/catalogs.html[`catalog_name`^]: Specifies the name of the system catalog that contains the object. + +== Restrictions + +* This function always returns `NULL` if there are no parameters specified. diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-backend-pid.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-backend-pid.adoc new file mode 100644 index 000000000..3f3983c7b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-backend-pid.adoc @@ -0,0 +1,12 @@ += pg_backend_pid() +:description: The pg_backend_pid() function returns the process ID (PID) of the server process handling the current session. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[`pg_backend_pid()`^] is a session information function that returns the process ID (PID) of the server process handling the current session. It is useful for identifying the backend process associated with a specific database connection, allowing for monitoring and task management. + +== Syntax + +[source,sql] +---- +pg_backend_pid() +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-encoding-to-char.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-encoding-to-char.adoc new file mode 100644 index 000000000..0c7b79879 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-encoding-to-char.adoc @@ -0,0 +1,48 @@ += pg_encoding_to_char() +:description: The pg_encoding_to_char() function converts an encoding internal identifier to a human-readable name. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[`pg_encoding_to_char()`^] is a system catalog information function that converts an encoding internal identifier to a human-readable name. + +== Syntax + +[source,sql] +---- +pg_encoding_to_char(number) +---- + +== Parameters + +* `number`: Specifies the integer value representing the encoding identifier. + +== Examples + +[source,sql] +---- +SELECT pg_encoding_to_char(1); + + pg_encoding_to_char +--------------------- + EUC_JP +(1 row) +---- + +[source,sql] +---- +SELECT pg_encoding_to_char(0); + + pg_encoding_to_char +--------------------- + SQL_ASCII +(1 row) +---- + +[source,sql] +---- +SELECT pg_encoding_to_char(-1); + + pg_encoding_to_char +--------------------- + +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-get-constraintdef.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-constraintdef.adoc new file mode 100644 index 000000000..38c96b973 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-constraintdef.adoc @@ -0,0 +1,21 @@ += pg_get_constraintdef() +:description: The pg_get_constraintdef() function retrieves the definition of a specific constraint in a human-readable format. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[`pg_get_constraintdef()`^] is a system catalog information function that retrieves the definition of a specific constraint in a human-readable format. + +== Syntax + +[source,text] +---- +pg_get_constraintdef (constraint_oid [, pretty_bool]) → NULL +---- + +== Parameters + +* link:https://www.postgresql.org/docs/current/catalog-pg-constraint.html[`constraint_oid`^]: Specifies the object identifier (OID) of the constraint to retrieve the definition for. +* link:https://www.postgresql.org/docs/current/datatype-boolean.html[`pretty_bool`^]: Controls whether to format the output in a human-readable way. + +== Restrictions + +* This function always returns `NULL` if there are no parameters specified. diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-get-expr.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-expr.adoc new file mode 100644 index 000000000..553dca1b7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-expr.adoc @@ -0,0 +1,73 @@ += pg_get_expr() +:description: The pg_get_expr() function retrieves the internal form of an individual expression, such as the default value for a column. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[`pg_get_expr()`^] is a system catalog information function that retrieves the internal form of an individual expression, such as the default value for a column. + +== Syntax + +The `pg_get_expr()` function has two available syntax versions: + +[source,sql] +---- +SELECT pg_get_expr('expr_text', relation_oid); +---- + +[source,sql] +---- +SELECT pg_get_expr('expr_text', relation_oid, pretty_bool); +---- + +Both versions of the `pg_get_expr()` function return an empty string `""`. + +== Parameters + +* `expr_text`: Expression to obtain the internal representation for (can be any string value). +* `relation_oid`: OID (object identifier) of the table the expression belongs to (integer type). +* `pretty_bool`: Boolean value determining whether to format the expression in a more human-readable format (`TRUE`) or not (`FALSE`). + +== Examples + +First, create a sample table named *employees*: + +[source,sql] +---- +CREATE TABLE employees ( + id INT, + name TEXT, + salary TEXT +); +---- + +Then get the OID of the table: + +[source,sql] +---- +SELECT oid FROM pg_class WHERE relname = 'employees'; +---- + +[source,sql] +---- + oid +------ + 1018 +---- + +Retrieve the internal form for the `salary` column using the `pg_get_expr()` function: + +[source,sql] +---- +-- Version 1 +SELECT pg_get_expr('salary', 1018); + +-- Version 2 +SELECT pg_get_expr('salary', 1018, TRUE); +---- + +Either query returns: + +[source,sql] +---- + pg_get_expr +------------- +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-get-indexdef.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-indexdef.adoc new file mode 100644 index 000000000..70f8ba3c5 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-indexdef.adoc @@ -0,0 +1,68 @@ += pg_get_indexdef() +:description: The pg_get_indexdef() function reconstructs the PostgreSQL command used to retrieve the definition of a specified index. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[`pg_get_indexdef()`^] is a system catalog information function that reconstructs the PostgreSQL command used to retrieve the definition of a specified index. + +== Syntax + +The `pg_get_indexdef()` function has two available syntax versions: + +[source,sql] +---- +pg_get_indexdef(index_oid, column_oid) +---- + +[source,sql] +---- +pg_get_indexdef(index_oid, column_oid, pretty_bool) +---- + +== Parameters + +The parameters required to execute this function: + +* `index_oid`: Specifies the object identifier (OID) of the index. +* `column_oid`: Indicates the column number within the index (starting from 1). +* `pretty_bool`: Controls whether to format the output in a human-readable way. + +== Examples + +Create a sample table and an index for it: + +[source,sql] +---- +CREATE TABLE sample_table(col int); +CREATE INDEX sample_index ON sample_table(col); +---- + +Once that is done, get the OID of the index: + +[source,sql] +---- +SELECT oid FROM pg_class WHERE relname = 'sample_index'; +---- + +[source,sql] +---- + oid +------ + 16387 +---- + +Retrieve the index definition: + +[source,sql] +---- +SELECT pg_get_indexdef(16387); +---- + +The query returns the reconstructed definition: + +[source,sql] +---- + pg_get_indexdef +------------------------------------------------------- + CREATE INDEX sample_index ON public.sample_table(col) +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-get-statisticsobjdef-columns.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-statisticsobjdef-columns.adoc new file mode 100644 index 000000000..92f3be8d7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-statisticsobjdef-columns.adoc @@ -0,0 +1,16 @@ += pg_get_statisticsobjdef_columns() +:description: The pg_get_statisticsobjdef_columns() function retrieves information about the columns associated with an extended statistics object. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[`pg_get_statisticsobjdef_columns()`^] is a system catalog information function that retrieves information about the columns associated with an extended statistics object. + +== Syntax + +[source,text] +---- +pg_get_statisticsobjdef_columns() → NULL +---- + +== Restrictions + +* This function always returns `NULL` if there are no parameters specified. diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-get-userbyid.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-userbyid.adoc new file mode 100644 index 000000000..992b0638b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-userbyid.adoc @@ -0,0 +1,53 @@ += pg_get_userbyid() +:description: The pg_get_userbyid() function retrieves the name of a user (role) given its unique identifier (OID). +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[`pg_get_userbyid()`^] is a system catalog information function that retrieves the name of a user (role) given its unique identifier (OID). + +== Syntax + +[source,sql] +---- +pg_get_userbyid(role_oid) +---- + +== Parameters + +* `role_oid`: Specifies the object identifier (OID) of the user. + +== Examples + +Get the OIDs of all the users: + +[source,sql] +---- +SELECT id,name FROM oxla_internal.oxla_role; +---- + +The query returns the list of users with their IDs (OIDs): + +[source,sql] +---- + id | name +----+--------- + 1 | oxla + 2 | other_user +(2 rows) +---- + +Translate the OID to a role name: + +[source,sql] +---- +SELECT pg_get_userbyid(2); +---- + +The query returns: + +[source,sql] +---- + pg_get_userbyid +----------------- + other_user +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-relation-is-publishable.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-relation-is-publishable.adoc new file mode 100644 index 000000000..6eb8dc11e --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-relation-is-publishable.adoc @@ -0,0 +1,36 @@ += pg_relation_is_publishable() +:description: Use the pg_relation_is_publishable() function to determine whether a specified relation (table) can be published in a publication. +:page-topic-type: reference + +Use the`pg_relation_is_publishable()` function to determine whether a specified relation (table) can be published in a link:https://www.postgresql.org/docs/current/logical-replication-publication.html[publication^]. + +== Syntax + +[source,sql] +---- +pg_relation_is_publishable(table_name_or_oid) +---- + +The function returns `false` for every existing table and `NULL` for any non-existing table. + +== Parameters + +* `table_name_or_oid`: Specifies the object identifier (OID) of a table or its name. + +== Examples + +[source,sql] +---- +SELECT pg_relation_is_publishable('existing_table'); + pg_relation_is_publishable +---------------------------- + f +---- + +[source,sql] +---- +SELECT pg_relation_is_publishable(16386); + pg_relation_is_publishable +---------------------------- + f +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-size-pretty.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-size-pretty.adoc new file mode 100644 index 000000000..2262794f8 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-size-pretty.adoc @@ -0,0 +1,36 @@ += pg_size_pretty() +:description: The pg_size_pretty() function converts sizes in bytes into a human-readable format. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-admin.html[`pg_size_pretty()`^] is a database object management function that converts sizes in bytes into a human-readable format. + +== Syntax + +[source,sql] +---- +pg_size_pretty(size) +---- + +== Parameters + +* `size`: Specifies the size in bytes to convert. + +== Examples + +[source,sql] +---- +SELECT pg_size_pretty(100); + pg_size_pretty +---------------- + 100 bytes +(1 row) +---- + +[source,sql] +---- +SELECT pg_size_pretty(1000000); + pg_size_pretty +---------------- + 977 kB +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-table-is-visible.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-table-is-visible.adoc new file mode 100644 index 000000000..15503ef3d --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-table-is-visible.adoc @@ -0,0 +1,41 @@ += pg_table_is_visible() +:description: The pg_table_is_visible() function checks whether a specified table or other database object is visible in the current schema search path. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-admin.html[`pg_table_is_visible()`^] is a schema visibility inquiry function that checks whether a specified table or other database object is visible in the current schema search path. + +== Syntax + +[source,sql] +---- +pg_table_is_visible(table_or_index_oid) +---- + +== Parameters + +* `table_or_index_oid`: Specifies the object identifier (OID) of a table or its name. + +== Examples + +[source,sql] +---- +SELECT pg_table_is_visible(-1); + pg_table_is_visible +---------------------------- +---- + +[source,sql] +---- +SELECT pg_table_is_visible(16386); + pg_table_is_visible +---------------------------- + t +---- + +[source,sql] +---- +SELECT pg_table_is_visible(16381); + pg_table_is_visible +---------------------------- + f +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-table-size.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-table-size.adoc new file mode 100644 index 000000000..b1b2de662 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-table-size.adoc @@ -0,0 +1,16 @@ += pg_table_size() +:description: The pg_table_size() function retrieves the size of a specific table, including its associated storage components but excluding indexes. +:page-topic-type: reference + +link:https://www.postgresql.org/docs/current/functions-admin.html[`pg_table_size()`^] is a system administration function that retrieves the size of a specific table, including its associated storage components but excluding indexes. + +== Syntax + +[source,sql] +---- +pg_table_size(regclass) +---- + +== Parameters + +* `regclass`: Name or object identifier (OID) of the table. diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-total-relation-size.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-total-relation-size.adoc new file mode 100644 index 000000000..cad5a6544 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-total-relation-size.adoc @@ -0,0 +1,55 @@ += pg_total_relation_size() +:description: The pg_total_relation_size() function retrieves the size of a table and is useful for monitoring storage requirements. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-admin.html[`pg_total_relation_size()`^] is a database object size function that retrieves the size of a table and is useful for monitoring the storage requirements. + +== Syntax + +[source,sql] +---- +pg_total_relation_size('relation_name'); +---- + +It returns the size of the specified table in bytes. + +== Parameters + +* `relation_name`: Name of the table to determine the size for. + +== Examples + +Create a `users` table: + +[source,sql] +---- +CREATE TABLE users ( + username TEXT, + email TEXT +); +INSERT INTO users (username, email) VALUES + ('john_doe', 'john.doe@example.com'), + ('jane_smith', 'jane.smith@example.com'), + ('alice_smith', 'alice.smith@example.com'), + ('bob_jones', 'bob.jones@example.com'), + ('susan_wilson', 'susan.wilson@example.com'), + ('michael_jackson', 'michael.jackson@example.com'), + ('lisa_johnson', 'lisa.johnson@example.com'), + ('david_smith', 'david.smith@example.com'); +---- + +Use the `pg_total_relation_size()` function to determine the size of the `users` table (in bytes): + +[source,sql] +---- +SELECT pg_total_relation_size('users'); +---- + +The query returns: + +[source,sql] +---- + pg_total_relation_size +------------------------ + 556 +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-typeof.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-typeof.adoc new file mode 100644 index 000000000..6fbac0532 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-typeof.adoc @@ -0,0 +1,104 @@ += pg_typeof() +:description: The pg_typeof() function retrieves the data type of any given value. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[`pg_typeof()`^] is a system catalog information function that retrieves the data type of any given value. It returns a string literal corresponding to the expression type. + +== Syntax + +[source,sql] +---- +SELECT pg_typeof(`any`); +---- + +== Parameters + +* `any`: Represents any value used to determine the data type. + +== Examples + +=== Numeric + +This example shows the function usage with a numeric value: + +[source,sql] +---- +SELECT pg_typeof(100) as "data type"; +---- + +[source,sql] +---- + data type +----------- + integer +---- + +=== String + +This example uses a string value as an input: + +[source,sql] +---- +SELECT pg_typeof('event'::TEXT) as "data type"; +---- + +[source,sql] +---- + data type +----------- + text +---- + +=== Interval + +This example uses an interval input: + +[source,sql] +---- +SELECT pg_typeof(INTERVAL '1 day') as "data type"; +---- + +[source,sql] +---- + data type +----------- + interval +---- + +=== Table + +This section shows how to create a sample table and then uses `pg_typeof()` to retrieve the data types of information stored in the table: + +[source,sql] +---- +CREATE TABLE timestamp_example ( + id int, + event_time timestamp, + description text +); + +INSERT INTO timestamp_example (event_time, description) +VALUES + ('2023-10-20 12:30:00', 'Event 1'), + (NULL, 'Event 2'); +---- + +Use the `pg_typeof()` function to determine the data types of the `event_time` and `description` columns for each row: + +[source,sql] +---- +SELECT + pg_typeof(event_time) AS event_time_type, + pg_typeof(description) AS description_type +FROM timestamp_example; +---- + +The query returns: + +[source,sql] +---- + event_time_type | description_type +-----------------------------+------------------ + timestamp without time zone | text + timestamp without time zone | text +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/shobj-description.adoc b/modules/reference/pages/sql/sql-functions/other-functions/shobj-description.adoc new file mode 100644 index 000000000..a50fc93f6 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/shobj-description.adoc @@ -0,0 +1,21 @@ += shobj_description() +:description: The shobj_description() function retrieves the comment associated with a shared database object. +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[`shobj_description()`^] is a comment information function that retrieves the comment associated with a shared database object. + +== Syntax + +[source,text] +---- +shobj_description (object_oid, catalog_name) → NULL +---- + +== Parameters + +* link:https://www.postgresql.org/docs/current/datatype-oid.html[`object_oid`^]: Specifies the object identifier (OID) of the shared object to retrieve the comment for. +* link:https://www.postgresql.org/docs/current/catalogs.html[`catalog_name`^]: Specifies the name of the system catalog that contains the shared object. + +== Restrictions + +* This function always returns `NULL` if no parameters are specified. diff --git a/modules/reference/pages/sql/sql-functions/string-functions/concat.adoc b/modules/reference/pages/sql/sql-functions/string-functions/concat.adoc new file mode 100644 index 000000000..5c0acbddf --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/concat.adoc @@ -0,0 +1,118 @@ += CONCAT +:description: Use the CONCAT() function to concatenate one or more input values into a single result. +:page-topic-type: reference + +Use the `CONCAT()` function to concatenate one or more input values into a single result. It supports all data types in Redpanda SQL, except `TIMESTAMPTZ`. The output is returned as a concatenation of the input values. + +*Special cases:* Returns `NULL` if there are no input rows or `NULL` values. + +== Examples + +=== Basic `CONCAT()` function + +This example uses the `CONCAT()` function to concatenate three values = into a single result: + +[source,sql] +---- +SELECT CONCAT ('Oxla', '.', 'com') AS "Website"; +---- + +The query returns: + +[source,sql] +---- ++------------+ +| Website | ++------------+ +| Oxla.com | ++------------+ +---- + +=== `CONCAT()` function using column + +This example uses a *payment* table that stores customer payment data: + +[source,sql] +---- +CREATE TABLE payment ( + paymentid int, + custFirstName text, + custLastName text, + product text, + ordertotal float +); +INSERT INTO payment + (paymentid, custFirstName, custLastName, product, ordertotal) +VALUES + (9557451,'Alex','Drue','Latte',2.10), + (9557421,'Lana','Rey','Latte',2.10), + (9557411,'Tom','Hanks','Americano',1.85), + (9557351,'Maya','Taylor','Cappuccino',2.45), + (9557321,'Smith','Jay','Cappuccino',2.45), + (9557311,'Will','Ritchie','Americano',1.85); +---- + +[source,sql] +---- +SELECT * FROM payment; +---- + +This query displays the table: + +[source,sql] +---- ++------------+----------------+----------------+--------------+---------------+ +| paymentid | custFirstName | custLastName | product | ordertotal | ++------------+----------------+----------------+--------------+---------------+ +| 9557451 | Alex | Drue | Latte | 2.10 | +| 9557421 | Lana | Rey | Latte | 2.10 | +| 9557411 | Tom | Hanks | Americano | 1.85 | +| 9557351 | Maya | Taylor | Cappuccino | 2.45 | +| 9557321 | Smith | Jay | Cappuccino | 2.45 | +| 9557311 | Will | Ritchie | Americano | 1.85 | ++------------+----------------+----------------+--------------+---------------+ +---- + +This query concatenates values in the `custFirstName` and `custLastName` columns of the *payment* table: + +[source,sql] +---- +SELECT CONCAT (custFirstName, ' ', custLastName) AS "Customer Name" +FROM payment; +---- + +This displays an output where spaces separate the first and last names. + +[source,sql] +---- ++-----------------+ +| Customer Name | ++-----------------+ +| Tom Hanks | +| Lana Rey | +| Alex Drue | +| Will Ritchie | +| Smith Jay | +| Maya Taylor | ++-----------------+ +---- + +=== CONCAT() function with NULL + +This example shows how to use the `CONCAT()` function to concatenate a string with a `NULL` value: + +[source,sql] +---- +SELECT CONCAT('Talent Source ',NULL) AS "concat"; +---- + +The result shows that the `CONCAT` function will skip the `NULL` value: + +[source,sql] +---- ++------------------+ +| concat | ++------------------+ +| Talent Source | ++------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/ends-with.adoc b/modules/reference/pages/sql/sql-functions/string-functions/ends-with.adoc new file mode 100644 index 000000000..214bc46e9 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/ends-with.adoc @@ -0,0 +1,154 @@ += ENDS_WITH +:description: The ENDS_WITH() function determines whether the first argument ends with a specified string in the second argument or not. +:page-topic-type: reference + +The `ENDS_WITH()` function determines whether the first argument ends with a specified string in the second argument or not. + +[source,sql] +---- +ENDS_WITH(first_argument, 'second_argument') +---- + +* `first_argument`: The search reference. Can be a string or a column name. +* `second_argument`: The specified argument, which will have the search keywords. + +The input type will be `STRING`, and the return type is `BOOL`, shown as `true` or `false`. + +*Special case:* + +* Returns `NULL` for the `NULL` record. +* Returns `true` (including the `NULL` record) if the `second_argument` is not specified. + +== Examples + +=== `ENDS_WITH()` function using column + +Consider a table named *courses*: + +[source,sql] +---- +CREATE TABLE courses ( + course_id int, + course_name text, + credits text +); +INSERT INTO courses + (course_id, course_name, credits) +VALUES + (2111,'Basics of Plant Biotechnology',2), + (2102,'Biochemistry',3), + (1241,'Statistics',3), + (4142,'Microbial Biodiversity',2), + (3262,'Introduction to Plant Pathology',3), + (3233,'Enzyme Technology',2), + (1201,'Rural Sociology',2); +---- + +[source,sql] +---- +SELECT * FROM courses; +---- + +The query displays the table: + +[source,sql] +---- ++------------+----------------------------------+-----------+ +| course_id | course_name | credits | ++------------+----------------------------------+-----------+ +| 2111 | Basics of Plant Biotechnology | 2 | +| 2102 | Biochemistry | 3 | +| 1241 | Statistics | 3 | +| 4142 | Microbial Biodiversity | 2 | +| 3262 | Introduction to Plant Pathology | 3 | +| 3233 | Enzyme Technology | 2 | +| 1201 | Rural Sociology | 2 | ++------------+----------------------------------+-----------+ +---- + +This query checks which values of the *course_name* column end with "`ology`" in the preceding table: + +[source,sql] +---- +SELECT course_name, ENDS_WITH(course_name, 'ology') FROM courses; +---- + +This returns true to all the courses with the name ending with **ology.** Otherwise*,* `false`. + +[source,sql] +---- ++----------------------------------+-------------+ +| course_name | ends_with | ++----------------------------------+-------------+ +| Basics of Plant Biotechnology | true | +| Biochemistry | false | +| Statistics | false | +| Microbial Biodiversity | false | +| Introduction to Plant Pathology | true | +| Enzyme Technology | true | +| Rural Sociology | true | ++----------------------------------+-------------+ +---- + +=== `ENDS_WITH()` function with no specified argument + +The **patients_data** table has a `NULL` value in the *allergies* column: + +[source,sql] +---- +CREATE TABLE patients_data ( + record_number int, + patient_name text, + height_in_cm int, + weight_in_kg int, + allergies text +); +INSERT INTO patients_data + (record_number, patient_name, height_in_cm, weight_in_kg, allergies) +VALUES + (2009000908,'Vivienne Desjardin',168,49,''), + (2012000876,'Elizabeth Reinhard',163,55,''), + (2015000965,'James McCarthy',188,70,'penicillin'), + (2020000109,'Jose Ramirez',170,70,'sulfonamide'), + (2020000222,'Stefani Ricci',170,70,'peniccilin'); +---- + +[source,sql] +---- +SELECT * FROM patients_data; +---- + +[source,sql] +---- ++----------------+---------------------+---------------+--------------+-------------+ +| record_number | patient_name | height_in_cm | weight_in_kg | allergies | ++----------------+---------------------+---------------+--------------+-------------+ +| 2009000908 | Vivienne Desjardin | 168 | 49 | null | +| 2012000876 | Elizabeth Reinhard | 163 | 55 | null | +| 2015000965 | James McCarthy | 188 | 70 | penicillin | +| 2020000109 | Jose Ramirez | 170 | 70 | sulfonamide | +| 2020000222 | Stefani Ricci | 170 | 70 | peniccilin | ++----------------+---------------------+---------------+--------------+-------------+ +---- + +For example, run the `ENDS_WITH` function but with no specified `second_argument`: + +[source,sql] +---- +SELECT allergies, ENDS_WITH(allergies, '') FROM patients_data; +---- + +The result shows that `ENDS_WITH` returns true for all records (even the `NULL` one): + +[source,sql] +---- ++--------------+--------------+ +| allergies | ends_with | ++--------------+--------------+ +| null | true | +| null | true | +| penicillin | true | +| sulfonamide | true | +| peniccilin | true | ++--------------+--------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/index.adoc b/modules/reference/pages/sql/sql-functions/string-functions/index.adoc new file mode 100644 index 000000000..83772e07c --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/index.adoc @@ -0,0 +1,37 @@ += Overview +:description: String functions analyze and manipulate string values. + +Use string functions to analyze and manipulate string values. Redpanda SQL supports these string related functions and operators: + +== String functions + +[width="100%",cols="20%,80%",options="header",] +|=== +|*Function* |*Description* +|xref:reference:sql/sql-functions/string-functions/length.adoc[LENGTH()] |Returns the number of characters in a string +|xref:reference:sql/sql-functions/string-functions/lower.adoc[LOWER()] |Makes string lowercase +|xref:reference:sql/sql-functions/string-functions/upper.adoc[UPPER()] |Makes string upper case +|xref:reference:sql/sql-functions/string-functions/starts-with.adoc[STARTS_WITH()] |Checks if a string starts with a specified substring +|xref:reference:sql/sql-functions/string-functions/ends-with.adoc[ENDS_WITH()] |Checks if a string ends with a specified substring +|xref:reference:sql/sql-functions/string-functions/concat.adoc[CONCAT()] |Adds two or more strings together +|xref:reference:sql/sql-functions/string-functions/substr.adoc[SUBSTR()] |Extracts a substring from a string +|xref:reference:sql/sql-functions/string-functions/strpos.adoc[STRPOS()] |Finds the position at which the substring starts within the string +|xref:reference:sql/sql-functions/string-functions/regex/regexp-match.adoc[REGEXP_MATCH()] |Matches a POSIX regular expression pattern to a string +|xref:reference:sql/sql-functions/string-functions/regex/regexp-replace.adoc[REGEXP_REPLACE()] |Substitutes new text for substrings that match POSIX regular expression patterns +|xref:reference:sql/sql-functions/string-functions/replace.adoc[REPLACE()] |Finds and replace occurences of a substring in a string +|xref:reference:sql/sql-functions/string-functions/position.adoc[POSITION()] |Returns the position of the first occurrence of a substring in a string +|=== + +== String operators + +[width="100%",cols="51%,49%",options="header",] +|=== +|*Operator* |*Description* +|text ~ text -> boolean |Returns `true` if the first argument matches the pattern of the second argument in case-sensitive match +|text ~* text -> boolean |Returns `true` if the first argument matches the pattern of the second argument in a case-insensitive match +|text !~ text -> boolean |Returns `true` if the first argument does not match the pattern of the second argument in case-sensitive match. +|text ~ text -> boolean |Returns `true` if the first argument matches the pattern of the second argument in case-sensitive match. +|text ~* text -> boolean |Returns `true` if the first argument matches the pattern of the second argument in a case-insensitive match. +|text !~ text -> boolean |Returns `true` if the first argument does not match the pattern of the second argument in case-sensitive match. +|text !~* text -> boolean |Returns `true` if the first argument does not match the pattern of the second argument in a case-insensitive match. +|=== diff --git a/modules/reference/pages/sql/sql-functions/string-functions/length.adoc b/modules/reference/pages/sql/sql-functions/string-functions/length.adoc new file mode 100644 index 000000000..6c6a48985 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/length.adoc @@ -0,0 +1,105 @@ += LENGTH +:description: Use the LENGTH() function to find the length of a string (for example, the number of characters in a given string). +:page-topic-type: reference + +Use the `LENGTH()` function to find the length of a string(for example, the number of characters in a given string). It accepts a string as a parameter. The syntax of the length function is: + +[source,sql] +---- +LENGTH(string) +---- + +The input type is a string, and the return type is int, as it returns the number of characters. + +*Special cases:* + +* If a NULL value is passed in the function. (For example, `LENGTH(NULL)`, it returns `NULL`). +* If the parameter is an empty string `LENGTH(")`, it returns 0. +* If the parameter is a space character `LENGTH('')`, not empty or NULL, it returns 1 as it is not empty anymore. + +== Examples + +=== Basic `LENGTH()` function + +This example uses the `LENGTH()` function to find out the length of a string text: + +[source,sql] +---- +SELECT LENGTH ('Redpanda SQL Tutorial'); +---- + +The query returns: + +[source,sql] +---- ++------------+ +| length | ++------------+ +| 21 | ++------------+ +---- + +=== `LENGTH()` function using columns + +The following example uses the `LENGTH()` function on a `personal_details` table containing `id`, `first_name`, `last_name`, and `gender` columns for retail store employees. + +[source,sql] +---- +CREATE TABLE personal_details ( + id int, + first_name text, + last_name text, + gender text +); +INSERT INTO personal_details + (id, first_name, last_name, gender) +VALUES + (1,'Mark','Wheeler','M'), + (2,'Tom','Hanks','M'), + (3,'Jane','Hopper','F'), + (4,'Emily','Byers','F'), + (5,'Lucas','Sinclair','M'); +---- + +[source,sql] +---- +SELECT * FROM personal_details; +---- + +The query shows this table: + +[source,sql] +---- ++-----+-------------+-------------+----------+ +| id | first_name | last_name | gender | ++-----+-------------+-------------+----------+ +| 1 | Mark | Wheeler | M | +| 2 | Tom | Hanks | M | +| 3 | Jane | Hopper | F | +| 4 | Emily | Byers | F | +| 5 | Lucas | Sinclair | M | ++-----+-------------+-------------+----------+ +---- + +The query returns the last name and the length of the last name from the personal_details table, where the length of the last_name is greater than 5. + +[source,sql] +---- +SELECT last_name,length(last_name) +AS "Length of Last Name" +FROM personal_details +WHERE LENGTH(last_name) > 5; +---- + +The output displays all those items in the last_name column with a length of more than 5 characters: + +[source,sql] +---- ++---------------+-----------------------+ +| last_name | Length of Last Name | ++---------------+-----------------------+ +| Wheeler | 7 | +| Hopper | 6 | +| Sinclair | 8 | ++---------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/lower.adoc b/modules/reference/pages/sql/sql-functions/string-functions/lower.adoc new file mode 100644 index 000000000..da123f7da --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/lower.adoc @@ -0,0 +1,105 @@ += LOWER +:description: The LOWER() function returns a given string, an expression, or values in a column in all lowercase letters. +:page-topic-type: reference + +The LOWER() function returns a given string, an expression, or values in a column in all lowercase letters. The syntax of the function is: + +[source,sql] +---- +LOWER(string) +---- + +It accepts input as a string and returns the text in the lowercase alphabet. + +*Special Cases:* If there are characters in the input which are not of type string, they remain unaffected by the LOWER()function. + +[NOTE] +==== +Unicode is supported so that the ß is equivalent to the string ss. +==== + +== Examples + +=== Basic `LOWER()` function + +This basic query shows how to convert the given string in all lowercase alphabets: + +[source,sql] +---- +SELECT LOWER('PostGreSQL'); +---- + +The query returns: + +[source,sql] +---- ++------------+ +| lower | ++------------+ +| postgresql | ++------------+ +---- + +=== `LOWER()` function using columns + +This example shows how the `LOWER()` function works with columns. The *personal_details* table contains columns *id*, *first_name*, *last_name*, and *gender* of retail store employees. + +[source,sql] +---- +CREATE TABLE personal_details ( + id int, + first_name text, + last_name text, + gender text +); +INSERT INTO personal_details + (id, first_name, last_name, gender) +VALUES + (1,'Mark','Wheeler','M'), + (2,'Tom','Hanks','M'), + (3,'Jane','Hopper','F'), + (4,'Emily','Byers','F'), + (5,'Lucas','Sinclair','M'); +---- + +[source,sql] +---- +SELECT * FROM personal_details; +---- + +This query shows the table: + +[source,sql] +---- ++-----+-------------+-------------+----------+ +| id | first_name | last_name | gender | ++-----+-------------+-------------+----------+ +| 1 | Mark | Wheeler | M | +| 2 | Tom | Hanks | M | +| 3 | Jane | Hopper | F | +| 4 | Emily | Byers | F | +| 5 | Lucas | Sinclair | M | ++-----+-------------+-------------+----------+ +---- + +Assume that the goal is to convert the first and last names of employees with *id* numbers 2, 4, and 5 to all lowercase letters: + +[source,sql] +---- +SELECT first_name,last_name,LOWER(first_name),LOWER(last_name) +FROM personal_details +where id in (2, 4, 5); +---- + +The output displays the first and last names of employees with the specified ids in lowercase letters: + +[source,sql] +---- ++------------+-------------+----------+----------+ +| first_name | last_name | lower | lower | ++------------+-------------+----------+----------+ +| Tom | Hanks | tom | hanks | +| Emily | Byers | emily | byers | +| Lucas | Sinclair | lucas | sinclair | ++------------+-------------+----------+----------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/position.adoc b/modules/reference/pages/sql/sql-functions/string-functions/position.adoc new file mode 100644 index 000000000..28053257e --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/position.adoc @@ -0,0 +1,90 @@ += POSITION +:description: The POSITION() function returns the position of the first occurrence of a substring in a string. +:page-topic-type: reference + +The `POSITION()` function returns the position of the first occurrence of a substring in a string. It works the same as xref:reference:sql/sql-functions/string-functions/strpos.adoc[STRPOS], but it has slightly different syntax. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +POSITION(substring IN string) +---- + +The position of the substring within the string starts from 1. If the substring is not found, it returns 0. + +== Examples + +=== Example 1 + +This query looks for the position of the substring `world` within the string `Hello, world!`. + +[source,sql] +---- +SELECT POSITION('world' IN 'Hello, world!'); +---- + +The result would be the starting position of the substring `world`, which is 7. + +[source,sql] +---- +position +---------- + 7 +---- + +=== Example 2 + +The query looks for the position of the substring `123` within the string `1a2b3c`. + +[source,sql] +---- +SELECT POSITION('123' IN '1a2b3c'); +---- + +`123` is found starting at position 1, the result would be 1. + +[source,sql] +---- +position +---------- + 7 +---- + +=== Example 3 + +The query tries to find the position of the substring `abc` within the string `xyz`. + +[source,sql] +---- +SELECT POSITION('abc' IN 'xyz'); +---- + +`abc` is not found in `xyz`, the result would be 0. + +[source,sql] +---- +position +---------- + 0 +---- + +=== Example 4 + +This query searches for the position of the substring `cde` within the string `cde`. + +[source,sql] +---- +SELECT POSITION('cde' IN 'cde'); +---- + +`cde` is the entire string, the result would be 1. + +[source,sql] +---- +position +---------- + 1 +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/regex/index.adoc b/modules/reference/pages/sql/sql-functions/string-functions/regex/index.adoc new file mode 100644 index 000000000..ebdccff46 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/regex/index.adoc @@ -0,0 +1,3 @@ += Regular Expressions +:description: Reference for regular expression functions in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-functions/string-functions/regex/posix-regular-expressions.adoc b/modules/reference/pages/sql/sql-functions/string-functions/regex/posix-regular-expressions.adoc new file mode 100644 index 000000000..9341fa2bd --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/regex/posix-regular-expressions.adoc @@ -0,0 +1,20 @@ += POSIX Regular Expressions +:description: POSIX (Portable Operating System Interface) defines a set of standard operating system interfaces based on the UNIX OS. +:page-topic-type: reference + +*POSIX* (Portable Operating System Interface) defines a set of standard operating system interfaces based on the UNIX OS. In POSIX Basic Regex Expression (BRE) syntax, most characters are treated as literals (for example, they match only themselves). However, some characters called *metacharacters* have special meaning. + +This table describes common POSIX BRE metacharacters: + +[width="100%",cols="38%,62%",options="header",] +|=== +|*Metacharacter* |*Description* +|`.` |Matches any single character. For example, `a.c` matches "`*abc*`", but `[a.c]` matches only "`*a*`", "`*.*`", or "`*c*`" +|`-` |Used to define a range. For example, `[a-c]` will match characters *a* to *c* (both inclusive) +|[] |Calculates and returns a value corresponding to the minimal metric in the same row from a set of values +|`^` |Calculates and returns the maximum value +|`$` |Calculates and returns a value corresponding to the maximum metric in the same row from a set of values +|`*` |Calculates and returns the average value +|`\{n}` |Counts the number of rows +|`{n,m}` |Calculates the boolean of all the boolean values in the aggregated group (returns `FALSE` if at least one of aggregated rows is `FALSE` ) +|=== diff --git a/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-match.adoc b/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-match.adoc new file mode 100644 index 000000000..f2158f9c2 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-match.adoc @@ -0,0 +1,138 @@ += REGEXP_MATCH() +:description: The REGEXP_MATCH() function matches a POSIX regular expression pattern to a string. +:page-topic-type: reference + +The `REGEXP_MATCH()` function matches a POSIX regular expression pattern to a string. It returns an array of `TEXT[]` type with substring(s) of matched groups within the first match. + +== Syntax + +The syntax for `REGEXP_MATCH()` function is: + +[source,sql] +---- +REGEXP_MATCH(source_string, pattern, [flags]) +---- + +== Parameters + +* `source_string`: String on which to perform the match. +* `pattern`: POSIX regular expression pattern to match. +* `flags`: Optional. Flags that change the matching behavior of `REGEXP_MATCH()`. + +The `flags` parameter is an optional string that controls how the function operates. Here is a list of flags that are supported by Redpanda SQL: + +* `i`: Use this flag for case-insensitive matching. +* `c`: `REGEXP_MATCH()` function is case-sensitive by default, using the `c` flag has the same effect as having no flags at all. + +[NOTE] +==== +If using multiple flags, the last one takes precedence. If using the `ci` flags, the regex will be case-insensitive, while using the `ic` flags it will be case-sensitive. +==== + +== Examples + +=== Basic Usage + +These examples demonstrate how to find the first occurrence of an email address in the input string: + +[source,sql] +---- +SELECT REGEXP_MATCH('Contact us at hello@example.com', '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'); +---- + +[source,sql] +---- + regexp_match +---------------------- + {hello@example.com} +(1 row) +---- + +=== Match multiple groups + +The `REGEXP_MATCH()` function can capture multiple groups within a match, which extracts key parts from a string in a structured way. This example shows how to extract the protocol, domain and path from a given URL: + +[source,sql] +---- +SELECT REGEXP_MATCH('https://www.example.com/products/item123', '(https?)://([\w.-]+)/(.+)'); +---- + +[source,sql] +---- + regexp_match +------------------------------------------ + {https,www.example.com,products/item123} +(1 row) +---- + +=== Case-insensitive matching + +This example shows how to match a pattern regardless of case-sensitivity: + +[source,sql] +---- +SELECT REGEXP_MATCH('User.Name@Example.COM', '@([a-z0-9.-]+)$', 'i'); +---- + +[source,sql] +---- + regexp_match +--------------- + {Example.COM} +(1 row) +---- + +=== Match with patterns stored in a table + +This example shows how to take the source string and regex pattern directly from the table. First, create two sample tables: + +[source,sql] +---- +CREATE TABLE users ( + email TEXT NOT NULL +); + +CREATE TABLE patterns ( + id INT, + regex_pattern TEXT NOT NULL +); +---- + +Once that is done, insert values into those tables: + +[source,sql] +---- +INSERT INTO users (email) VALUES + ('user@example.com'), + ('admin@test.org'), + ('invalid-email@wrong'); + +INSERT INTO patterns (id, regex_pattern) VALUES + (0, '^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$'); +---- + +Validate whether user emails in the `users` table are valid. If the regex doesn't match, a `NULL` value is returned. + +[source,sql] +---- +SELECT users.email, + patterns.regex_pattern, + REGEXP_MATCH(users.email, patterns.regex_pattern, 'i') AS is_valid +FROM users +JOIN patterns ON patterns.id = 0; +---- + +[source,sql] +---- + email | regex_pattern | is_valid +---------------------+-----------------------------------------+-------------------- + user@example.com | ^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$ | {user@example.com} + admin@test.org | ^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$ | {admin@test.org} + invalid-email@wrong | ^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$ | +(3 rows) +---- + +=== Restrictions + +* The function returns `NULL` if it cannot match the regular expression pattern. +* `i` and `c` flags shouldn't be used with each other diff --git a/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-replace.adoc b/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-replace.adoc new file mode 100644 index 000000000..170428a90 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-replace.adoc @@ -0,0 +1,156 @@ += REGEXP_REPLACE() +:description: The REGEXP_REPLACE() function replaces all occurrences of a regular expression pattern in a string with a specified replacement string. +:page-topic-type: reference + +The `REGEXP_REPLACE()` function replaces all occurrences of a regular expression pattern in a string with a specified replacement string. + +== Syntax + +The syntax for `REGEXP_REPLACE()` function is: + +[source,sql] +---- +REGEXP_REPLACE(source_string, pattern, replacement, [flags]) +---- + +== Parameters + +* `source_string`: String on which to perform the replacement. +* `pattern`: POSIX regular expression pattern to match. +* `replacement`: Replacement string. +* `flags`: Optional. Flags that change the matching behavior of `REGEXP_REPLACE()`. + +The `flags` parameter is an optional string that controls how the function operates. Here is a list of flags supported in Redpanda SQL: + +* `g`: Global replacement. This flag ensures that all occurrences of the pattern are replaced. +* `i`: Use this flag for case-insensitive matching. +* `c`: `REGEXP_REPLACE()` function is case-sensitive by default, using the `c` flag has the same effect as using no flags. + +== Examples + +=== Basic function usage + +This example uses the `REGEXP_REPLACE()` function with a basic POSIX regular expression pattern: + +[source,sql] +---- +SELECT REGEXP_REPLACE('The SQL ENGINE supports various data types', 'T[^ ]*', 'The') AS "Replaced_String"; +---- + +The query returns: + +[source,sql] +---- + Replaced_String +----------------------------------------- + The SQL ENGINE supports various data types +---- + +The pattern used was *"`T[^ ]*`"*, which matches any substring that starts with a '`T`' character, followed by any number of non-space characters. The function replaces the matched substring with the specified replacement string *"`We`"*. + +=== Replace special characters + +This example demonstrates how to replace a non-alphanumeric character in a string with a tilde (~): + +[source,sql] +---- +SELECT REGEXP_REPLACE('Hello World!', '[^A-Za-z0-9 ]', '~') AS "Replaced_String"; +---- + +In this query, the second parameter is a regular expression *"[^A-Za-z0-9 ]"* that matches any characters that are not uppercase / lowercase letters, digits or spaces. Output: + +[source,sql] +---- + Replaced String +------------------- + Hello World~ +---- + +=== Flag usage + +==== Replace certain substrings with a single flag defined + +This example uses the `REGEXP_REPLACE()` function with a defined flag to replace certain substrings in a string. First, create a sample `quotes` table: + +[source,sql] +---- +CREATE TABLE quotes (quotes_text text); +INSERT INTO quotes (quotes_text) +VALUES ('Work hard and stay hungry. Lazy people get nowhere in life.'), + ('An excuse is a way for a LAZY person to feel better.'), + ('The word LUCKY is how a lazy person describes someone who works hard.'); + +SELECT quotes_text FROM quotes; +---- + +The query returns: + +[source,bash] +---- + quotes_text +----------------------------------------------------------------------- + Work hard and stay hungry. Lazy people get nowhere in life. + An excuse is a way for a LAZY person to feel better. + The word LUCKY is how a lazy person describes someone who works hard. +(3 rows) +---- + +Now, use the `REGEXP_REPLACE()` function with the `i` flag specified to replace all occurrences of the word `lazy` with `active` regardless of case sensitivity: + +[source,sql] +---- +SELECT quotes_text, REGEXP_REPLACE(quotes_text, 'lazy', 'active', 'i') AS "New quotes" FROM quotes; +---- + +In this case, all occurrences of the word `lazy` have been replaced with `active`: + +[source,bash] +---- + quotes_text | New quotes +-----------------------------------------------------------------------+------------------------------------------------------------------------- + Work hard and stay hungry. Lazy people get nowhere in life. | Work hard and stay hungry. active people get nowhere in life. + An excuse is a way for a LAZY person to feel better. | An excuse is a way for a active person to feel better. + The word LUCKY is how a lazy person describes someone who works hard. | The word LUCKY is how a active person describes someone who works hard. +(3 rows) +---- + +=== Specify one or more flags + +Without specifying the `g` flag, `REGEXP_REPLACE()` function replaces only the first occurrence of a substring: + +[source,sql] +---- +SELECT REGEXP_REPLACE('ab12c', '[0-9]', 'X'); +---- + +[source,sql] +---- + regexp_replace +---------------- + abX2c +---- + +In this case, only the first digit (`1`) was replaced with `X`. By adding the `g` flag, all occurrences are replaced with `X`: + +[source,sql] +---- +SELECT REGEXP_REPLACE('ab12c', '[0-9]', 'X', 'g'); +---- + +[source,sql] +---- + regexp_replace +---------------- + abXXc +---- + +[NOTE] +==== +If using multiple flags, the last one takes precedence. If using the `ci` flags, the regex will be case-insensitive, while using the `ic` flags it will be case-sensitive +==== + +== Restrictions + +* The function returns `NULL` if there are no input rows or `NULL` values. +* If the regular expression pattern isn't found in the string, the `REGEXP_REPLACE()` function returns the original string +* `i` and `c` flags shouldn't be used with each other diff --git a/modules/reference/pages/sql/sql-functions/string-functions/replace.adoc b/modules/reference/pages/sql/sql-functions/string-functions/replace.adoc new file mode 100644 index 000000000..e376a6db2 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/replace.adoc @@ -0,0 +1,152 @@ += REPLACE() +:description: The REPLACE() function looks for and replaces a substring with a new one in a string. +:page-topic-type: reference + +The `REPLACE()` function looks for and replaces a substring with a new one in a string. This function is often used to update the outdated or spelling mistakes in data that require an amendment. + +[NOTE] +==== +Redpanda SQL also supports the xref:reference:sql/sql-functions/string-functions/regex/regexp-replace.adoc[`REGEXP_REPLACE()`] function. It searches and replaces a substring that matches with a POSIX regular expression +==== + +== Syntax + +The syntax for `REPLACE()` function is: + +[source,sql] +---- +REPLACE(string, old_substring, new_substring) +---- + +[WARNING] +==== +The `REPLACE()` function performs a case-sensitive replacement +==== + +=== Parameters + +The syntax requires these parameters: + +* `string`: String to replace. +* `old_substring`: Substring to replace. All occurrences in the string are replaced. +* `new_substring`: New substring that will replace the old one. + +== Examples + +=== Basic usage + +This example demonstrates a basic usage of the `REPLACE()` function. + +[source,sql] +---- +SELECT REPLACE ('NewDatabase', 'New', 'Redpanda'); +---- + +The `REPLACE()` function finds all occurrences of the '`New`' substring in the '`NewDatabase`' string and replaces it with the '`Redpanda`' substring, producing: + +[source,sql] +---- ++---------------------+ +| f | ++---------------------+ +| RedpandaDatabase | ++---------------------+ +---- + +=== Replace specified values in a table + +This example shows how to replace the values of a specific column in a table. First, create a new table named *extracurriculars* with *club* and *category* columns and insert the values into the respective columns. + +[source,sql] +---- +CREATE TABLE hobby ( + club text, + category text +); +INSERT INTO hobby + (club, category) +VALUES + ('Bridge','group'), + ('Painting','individual'), + ('Basketball','group'), + ('Volleyball','group'); +---- + +After that is completed, retrieve all values from the table using this query: + +[source,sql] +---- +SELECT * FROM hobby; +---- + +[source,sql] +---- ++------------+---------------+ +| club | category | ++------------+---------------+ +| Bridge | group | +| Painting | individual | +| Basketball | group | +| Volleyball | group | ++--------------+-------------+ +---- + +This query replaces the *'`group`'* values in the *category* column with *'`sports`'*: + +[source,sql] +---- +SELECT REPLACE(category, 'group', 'sports') from hobby; +---- + +[source,sql] +---- ++--------------+ +| f | ++--------------+ +| sports | +| individual | +| sports | +| sports | ++--------------+ +---- + +=== Remove a substring from a string + +This example shows how to remove a substring from a string using the `REPLACE()` function. In this case, the goal is to find all occurrences of the '`Friends`' substring in the '`Hello Friends`' string and remove it: + +[source,sql] +---- +SELECT REPLACE('Hello Friends', 'Friends', ''); +---- + +[source,sql] +---- ++-----------+ +| f | ++-----------+ +| Hello | ++-----------+ +---- + +=== Replace multiple patterns + +This example uses the `REPLACE()` function to replace multiple patterns of the given string: + +[source,sql] +---- +SELECT REPLACE(REPLACE(REPLACE(REPLACE('2*[9-5]/{4+8}', '[', '('), ']', ')'), '{', '('), '}', ')'); +---- + +The `REPLACE()` function is called multiple times to replace the corresponding string as specified: + +* *`[]`* into *`()`* +* *`{}`* into *`()`* + +[source,sql] +---- ++------------------+ +| f | ++------------------+ +| 2*(9-5)/(4+8) | ++------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/starts-with.adoc b/modules/reference/pages/sql/sql-functions/string-functions/starts-with.adoc new file mode 100644 index 000000000..7f32b0d24 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/starts-with.adoc @@ -0,0 +1,155 @@ += STARTS_WITH +:description: The STARTS_WITH() function determines whether the first argument starts with a specified string in the second argument or not. +:page-topic-type: reference + +The `STARTS_WITH()` function determines whether the first argument starts with a specified string in the second argument or not. + +[source,sql] +---- +STARTS_WITH(first_argument, 'second_argument') +---- + +* `first_argument`: The search reference. Can be a string or a column name. +* `second_argument`: The specified argument, which will have the search keywords. + +The input type will be `STRING`, and the return type is `BOOL`, shown as `true` or `false`. + +Special case: + +* Returns `NULL` for the `NULL` record. +* Returns `true` (including the `NULL` record) if the `second_argument` is not specified. + +== Examples + +=== `STARTS_WITH()` function using column + +Consider a table with the title *petsData*. + +[source,sql] +---- +CREATE TABLE petsData ( + petid int, + petname text, + species text, + breed text, + sex text, + age int +); +INSERT INTO petsData + (petid, petname, species, breed, sex, age) +VALUES + (2021001,'Bartholomeow','cat','persian','m',2), + (2021004,'Jack','dog','boston terrier','m',1), + (2022001,'Jesse','hamster','dzungarian','m',1), + (2022010,'Bella','dog','dobberman','f',3), + (2022011,'June','cat','american shorthair','f',2); +---- + +[source,sql] +---- +SELECT * FROM petsData; +---- + +This query shows the table: + +[source,sql] +---- ++----------+--------------+----------+---------------------+------+-----+ +| petid | petname | species | breed | sex | age | ++----------+--------------+----------+---------------------+------+-----+ +| 2021001 | Bartholomeow | cat | persian | m | 2 | +| 2021004 | Jack | dog | boston terrier | m | 1 | +| 2022001 | Jesse | hamster | dzungarian | m | 1 | +| 2022010 | Bella | dog | dobberman | f | 3 | +| 2022011 | June | cat | american shorthair | f | 2 | ++----------+--------------+----------+---------------------+------+-----+ +---- + +From the table, this query retrieves the values of the *petname* column that start with "J": + +[source,sql] +---- +SELECT petname, STARTS_WITH(petname, 'J') FROM petsData; +---- + +This returns `true` to the pet with a pet starting with the letter J. Otherwise, `false`. + +[source,sql] +---- ++--------------+---------------+ +| petname | starts_with | ++---------------+--------------+ +| Bartholomeow | false | +| Jack | true | +| Jesse | true | +| Bella | false | +| June | true | ++---------------+--------------+ +---- + +=== `STARTS_WITH()` function with no specified argument + +The *petsData* table has a `NULL` value in the breed column. + +[source,sql] +---- +CREATE TABLE petsData ( + petid int, + petname text, + species text, + breed text, + sex text, + age int +); +INSERT INTO petsData + (petid, petname, species, breed, sex, age) +VALUES + (2021001,'Bartholomeow','cat','persian','m',2), + (2021004,'Jack','dog','boston terrier','m',1), + (2022001,'Jesse','hamster','dzungarian','m',1), + (2022010,'Bella','dog','dobberman','f',3), + (2022011,'June','cat','american shorthair','f',2), + (2022012,'Phoebe','gold fish','','f',1); +---- + +[source,sql] +---- +SELECT * FROM petsData; +---- + +[source,sql] +---- ++----------+--------------+------------+---------------------+------+------+ +| petid | petname | species | breed | sex | age | ++----------+--------------+------------+---------------------+------+------+ +| 2021001 | Bartholomeow | cat | persian | m | 2 | +| 2021004 | Jack | dog | boston terrier | m | 1 | +| 2022001 | Jesse | hamster | dzungarian | m | 1 | +| 2022010 | Bella | dog | dobberman | f | 3 | +| 2022011 | June | cat | american shorthair | f | 2 | +| 2022012 | Phoebe | gold fish | | f | 1 | ++----------+--------------+------------+---------------------+------+------+ +---- + +For example, run the `STARTS_WITH` function but with no specified `second_argument:` + +[source,sql] +---- +SELECT breed, STARTS_WITH(breed, '') FROM petsData; +---- + +This result shows that `STARTS_WITH` returns true for all records (even the `NULL` one): + +[source,sql] +---- ++---------------------+--------------+ +| breed | starts_with | ++---------------------+--------------+ +| persian | true | +| boston terrier | true | +| dzungarian | true | +| dobberman | true | +| american shorthair | true | +| null | true | ++---------------------+--------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/strpos.adoc b/modules/reference/pages/sql/sql-functions/string-functions/strpos.adoc new file mode 100644 index 000000000..cdeba0eb0 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/strpos.adoc @@ -0,0 +1,113 @@ += STRPOS +:description: Use the STRPOS() to return the position from where the substring (the second argument) is matched with the string (the first argument). +:page-topic-type: reference + +Use the `STRPOS()` function to return the position from where the substring (the second argument) is matched with the string (the first argument). + +[source,sql] +---- +STRPOS(string, substring) +---- + +The input and return must be of type `string`. + +*Special cases:* + +* Returns `NULL` if there are no input rows or `NULL` values. +* If the `substring` is not found in the string, then the `STRPOS()` function will return 0. + +== Examples + +=== Basic `STRPOS()` function + +This example shows how to find the *ut* (substring) position in the *computer* (string): + +[source,sql] +---- +SELECT STRPOS('computer', 'ut') AS "Position of ut"; +---- + +The result shows that *ut* is located at the fifth character of *computer*: + +[source,sql] +---- ++-----------------+ +| Position of ut | ++-----------------+ +| 5 | ++-----------------+ +---- + +=== STRPOS() function using column + +The *listofwords* table stores word data: + +[source,sql] +---- +CREATE TABLE listofwords ( + words text +); +INSERT INTO listofwords + (words) +VALUES + ('corral'), + ('traditionally'), + ('real'), + ('communal'), + ('challenge'), + ('fall'), + ('wall'), + ('gallop'), + ('albatross'); +---- + +[source,sql] +---- +SELECT * FROM listofwords; +---- + +The preceding query shows the table: + +[source,sql] +---- ++----------------+ +| words | ++----------------+ +| corral | +| traditionally | +| real | +| communal | +| challenge | +| fall | +| wall | +| gallop | +| albatross | ++----------------+ +---- + +The query returns the words and a position of a specific substring = '*al*' using the `STRPOS()` function: + +[source,sql] +---- +SELECT words, STRPOS(words, 'al') AS "Position of al" +FROM listofwords; +---- + +The result displays the *al* position of different words: + +[source,sql] +---- ++----------------+------------------+ +| words | Position of al | ++----------------+------------------+ +| corral | 5 | +| traditionally | 10 | +| real | 3 | +| communal | 7 | +| challenge | 3 | +| fall | 2 | +| wall | 2 | +| gallop | 2 | +| albatross | 1 | ++----------------+------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/substr.adoc b/modules/reference/pages/sql/sql-functions/string-functions/substr.adoc new file mode 100644 index 000000000..c6868b14d --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/substr.adoc @@ -0,0 +1,144 @@ += SUBSTR +:description: The SUBSTR() function extracts a specific number of characters from a string. +:page-topic-type: reference + +The `SUBSTR()` function extracts a specific number of characters from a string. + +== Syntax + +The syntax of the function is: + +*2 Arguments* + +[source,sql] +---- +substr( string, start_position) +---- + +*3 Arguments* + +[source,sql] +---- +substr( string, start_position, length ) +---- + +[TIP] +==== +Both syntaxes will have input and return of type `string`. +==== + +=== Start position + +Use the `start_position` as the starting position, specifying the part from where the substring is to be returned. It is written as an integer value. + +[width="100%",cols="36%,64%",options="header",] +|=== +|*Input* |*Return* +|`start_position < 0 ``start_position < string` |The `start_position` is a given character in the string. The count starts from the first character. +|`start_position > string` |Returns an empty substring. +|`start_position` = negative value |The count starts from the provided negative value, with subsequent characters yielded as it approaches 0. +|=== + +If the index is less than or equal to 0, no characters are returned. + +Once it exceeds 0, characters from the string are yielded, starting from the first one. | + +=== Length + +Use the `length` function to determine the number of characters to be extracted__.__ It can be one or more characters. + +[width="100%",cols="20%,80%",options="header",] +|=== +|*Input* |*Return* +|`length` = 0 |Returns an empty substring. +|`length` is not set |The function will start from the specified `start_position` and end at the last character of the `string`. +|`length` = negative value |Returns an error. +|=== + +== Examples + +=== `SUBSTR()` function with specified `start_position` & `length` + +In this example, the `start_position` is set to the first six characters and five characters are extracted: + +[source,sql] +---- +SELECT substr('Watermelon',6,5) AS "Fruit"; +---- + +The query returns: + +[source,sql] +---- +Fruit +------- + melon +---- + +=== `SUBSTR()` function with `length` = 0 + +This query extracts a string with `length` = 0: + +[source,sql] +---- +SELECT substr('Watermelon',6,0) AS "Fruit"; +---- + +This displays an empty output as there is no `length` specified: + +[source,sql] +---- +Fruit +------- +---- + +=== `SUBSTR()` function with `length` = negative value + +This example checks if the `length` is specified with a negative value: + +[source,sql] +---- +SELECT substr('Watermelon',6,-2) AS "Fruit"; +---- + +Instead of extracting the string from the last characters, it returns an error: + +[source,sql] +---- +ERROR: Length of substring cannot be negative +---- + +=== `SUBSTR()` function with `start_position` > `string` + +The string *Watermelon* has only ten characters. This example shows what happens when the specified `start_position` is larger than the string's characters: + +[source,sql] +---- +SELECT substr('Watermelon',20,2) AS "Fruit"; +---- + +This displays an empty output: + +[source,sql] +---- +Fruit +------- +---- + +=== `SUBSTR()` function with 2 arguments + +In this example, the `start_position` is set to the first six characters and five characters are extracted. + +[source,sql] +---- +SELECT substr('database', 6) AS "Result"; +---- + +This displays the substring from position 6: + +[source,sql] +---- +Result +-------- + ase +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/substring.adoc b/modules/reference/pages/sql/sql-functions/string-functions/substring.adoc new file mode 100644 index 000000000..e437ed16c --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/substring.adoc @@ -0,0 +1,50 @@ += SUBSTRING +:description: SUBSTR is an alias for SUBSTRING. +:page-topic-type: reference + +[WARNING] +==== +SUBSTR is an alias for SUBSTRING. Learn more at xref:reference:sql/sql-functions/string-functions/substr.adoc[SUBSTR] documentation. +==== +The SUBSTRING() function lets you extract a part of a string and return that substring. + +== Syntax + +Here are the 2 basic syntaxes of the `SUBSTRING()` function in Redpanda SQL: + +*2 Arguments* + +[source,sql] +---- +SUBSTRING( string, start_position ) +---- + +*3 Arguments* + +[source,sql] +---- +SUBSTRING(string, start_position, length) +---- + +[TIP] +==== +Both syntaxes will have input and return of type `string`. +==== + +== Examples + +This example shows how to use the `SUBSTRING()` function to extract the first 7 characters from the string: + +[source,sql] +---- +SELECT SUBSTRING('RedpandaDocumentation', 1, 8); +---- + +This displays the substring from position 1: + +[source,sql] +---- +substring +----------- + Redpanda +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/upper.adoc b/modules/reference/pages/sql/sql-functions/string-functions/upper.adoc new file mode 100644 index 000000000..94fe1db62 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/upper.adoc @@ -0,0 +1,109 @@ += UPPER +:description: The UPPER() function returns a given string, an expression, or values in a column in all uppercase letters. +:page-topic-type: reference + +The `UPPER()` function returns a given string, an expression, or values in a column in all uppercase letters: + +[source,sql] +---- +UPPER(string) +---- + +It accepts input as a string and returns text in uppercase letters. + +*Special Case:* + +* If characters in the input are not of type string, they remain unaffected by the `UPPER()` function. +* Unicode is supported for the `UPPER()` function. + +== Examples + +=== Basic `UPPER()` function + +This basic query converts the given string to all uppercase letters: + +[source,sql] +---- +SELECT UPPER('PostGreSQL'); +---- + +The query returns: + +[source,sql] +---- ++-------------+ +| upper | ++-------------+ +| POSTGRESQL | ++-------------+ +---- + +=== UPPER() function using columns and CONCAT() function + +This example shows how the `UPPER()` function works with columns. A table named *personal_details* contains employee's *id*, *first_name*, *last_name*, and *gender* of a retail store: + +[source,sql] +---- +CREATE TABLE personal_details ( + id int, + first_name text, + last_name text, + gender text +); +INSERT INTO personal_details + (id, first_name, last_name, gender) +VALUES + (1,'Mark','Wheeler','M'), + (2,'Tom','Hanks','M'), + (3,'Jane','Hopper','F'), + (4,'Emily','Byers','F'), + (5,'Lucas','Sinclair','M'); +---- + +[source,sql] +---- +SELECT * FROM personal_details; +---- + +The query returns: + +[source,sql] +---- ++-----+-------------+-------------+----------+ +| id | first_name | last_name | gender | ++-----+-------------+-------------+----------+ +| 1 | Mark | Wheeler | M | +| 2 | Tom | Hanks | M | +| 3 | Jane | Hopper | F | +| 4 | Emily | Byers | F | +| 5 | Lucas | Sinclair | M | ++-----+-------------+-------------+----------+ +---- + +Assume that: + +. The goal is to convert employees' first and last names with *id* numbers 1, 3, and 5 to all uppercase letters. +. Then, combine them using the `CONCAT()` function into one *full_name* column in uppercase. ++ +Use this query: ++ +[source,sql] +---- +SELECT CONCAT (UPPER(first_name),' ', UPPER(last_name)) +as full_name +FROM personal_details +where id in (1, 3, 5); +---- ++ +The output displays the first and last names of employees with the specified ids in uppercase letters: ++ +[source,sql] +---- ++---------------------+ +| full_name | ++---------------------+ +| MARK WHEELER | +| JANE HOPPER | +| LUCAS SINCLAIR | ++---------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/current-timestamp.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/current-timestamp.adoc new file mode 100644 index 000000000..b2bbc88d9 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/current-timestamp.adoc @@ -0,0 +1,63 @@ += CURRENT_TIMESTAMP +:description: The CURRENT_TIMESTAMP() returns the current timestamp value representing the date and time the query was executed. +:page-topic-type: reference + +The `CURRENT_TIMESTAMP()` returns the current timestamp value representing the date and time the query was executed. + +[NOTE] +==== +Note that the time returned by this function is the time when the query was executed. +==== + +== Syntax + +[source,sql] +---- +CURRENT_TIMESTAMP +CURRENT_TIMESTAMP() +CURRENT_TIMESTAMP(precision) +---- + +== Arguments + +* `precision`: Optional. An integer literal from 0 to 6 that controls how many fractional-second digits to include. When omitted, the function returns full microsecond precision (6 digits). + +== Examples + +=== Default precision + +This example returns the current date and time with full microsecond precision: + +[source,sql] +---- +SELECT CURRENT_TIMESTAMP AS "Current Time"; +---- + +The final result will display the current date and time in the timezone in which it was issued: + +[source,sql] +---- +----------------------------- + Current Time +----------------------------- + 2022-08-31 16:56:06.464016 +----------------------------- +---- + +=== Truncated precision + +This example returns the current timestamp with no fractional seconds: + +[source,sql] +---- +SELECT CURRENT_TIMESTAMP(0) AS "Current Time"; +---- + +[source,sql] +---- +--------------------- + Current Time +--------------------- + 2022-08-31 16:56:06 +--------------------- +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/date-trunc.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/date-trunc.adoc new file mode 100644 index 000000000..4facdc7a0 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/date-trunc.adoc @@ -0,0 +1,160 @@ += DATE_TRUNC +:description: The DATE_TRUNC() function truncates a timestamp, timestamp with time zone or interval value to the specified precision, effectively rounding down the +:page-topic-type: reference + +The `DATE_TRUNC()` function truncates a timestamp, timestamp with time zone or interval value to the specified precision, effectively rounding down the value to the start of the given time unit. The return type matches the input type. + +== Syntax + +The syntax for using the `DATE_TRUNC()` function is: + +.Without time_zone +[source,sql] +---- +DATE_TRUNC(field, source) +---- + +.With time_zone +[source,sql] +---- +DATE_TRUNC(field, source, time_zone) +---- + +== Parameters + +* `field`: The unit of time used to truncate the `source` value. Type: `text`. Case-insensitive. +* `source`: The value to truncate. Must be of type `INTERVAL`, `TIMESTAMP`, or `TIMESTAMP WITH TIME ZONE`. +* `time_zone`: Optional. Time zone for the operation. Type: `text`. Used only with the second syntax form. + +== Fields + +Here is a list of supported values available to specify the fields param in `DATE_TRUNC()` syntax. + +* `microseconds` +* `milliseconds` +* `second` +* `minute` +* `hour` +* `day` +* `week` +* `month` +* `quarter` +* `year` +* `decade` +* `century` +* `millennium` + +[NOTE] +==== +Some fields like `microseconds` and `milliseconds` are supported only for interval types. +==== + +== Examples + +=== Truncate to year + +This example truncates the timestamp to the year level. + +[source,sql] +---- +select DATE_TRUNC('year', '1911-12-02 19:40:00'::timestamp); +---- + +The timestamp **"1911-12-02 19:40:00"** has been truncated to 1911, with the month and day set to January 1st. + +[source,sql] +---- + date_trunc +---------------------------- + 1911-01-01 00:00:00.000000 +---- + +=== Truncate to day + +This query truncates the timestamp *"`1911-12-02 19:40:00`"* to the day level. + +[source,sql] +---- +select DATE_TRUNC('day', '1911-12-02 19:40:00'::timestamp); +---- + +The timestamp has been truncated to the same day, year, month, and day components. + +[source,sql] +---- + date_trunc +---------------------------- + 1911-12-02 00:00:00.000000 +---- + +=== Truncate to week + +This query truncates the timestamp *"`1911-12-02 19:40:00`"* to the week level. + +[source,sql] +---- +select DATE_TRUNC('week', '1911-12-02 19:40:00'::timestamp); +---- + +The timestamp has been truncated to the start of the week containing the date, which is Monday, November 27, 1911, at 00:00:00. + +[source,sql] +---- + date_trunc +---------------------------- + 1911-11-27 00:00:00.000000 +---- + +=== Truncate to quarter + +This query truncates the timestamp *"`1911-12-02 19:40:00`"* to the quarter level. + +[source,sql] +---- +select DATE_TRUNC('quarter', '1911-12-02 19:40:00'::timestamp); +---- + +The timestamp is truncated to the start of the quarter. The month and day are set to the first month and first day of the quarter, with time components reset to zero. + +[source,sql] +---- + date_trunc +---------------------------- + 1911-10-01 00:00:00.000000 +---- + +=== Truncate to hour + +This query truncates the interval *"`15 hours 10 minutes`"* to the hour precision. + +[source,sql] +---- +select DATE_TRUNC('hour', '15 hour 10 minutes'::interval); +---- + +The minutes and seconds components are set to zero, resulting in an interval of exactly 15 hours. + +[source,sql] +---- + date_trunc +----------------- + 15:00:00.000000 +---- + +=== Truncate to quarter (interval) + +This query truncates the interval *"`16 years 4 months`"* to the quarter-year level. + +[source,sql] +---- +select DATE_TRUNC('quarter', '16 years 4 months'::interval); +---- + +The interval is truncated to the nearest quarter-year unit. The months components is adjusted to the start of the quarter. Since each quarter consists of 3 months, 4 months is truncated down to 3 months, resulting in: + +[source,sql] +---- + date_trunc +----------------- + 16 years 3 mons +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/extract.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/extract.adoc new file mode 100644 index 000000000..6bcead654 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/extract.adoc @@ -0,0 +1,94 @@ += EXTRACT +:description: The EXTRACT() function retrieves a specified part (field) from a given date/time or interval value. +:page-topic-type: reference + +The `EXTRACT()` function retrieves a specified part (field) from a given date/time or interval value. It is commonly used to obtain components such as year, month, day, and hour from timestamps or dates. + +== Syntax + +[source,sql] +---- +EXTRACT (field FROM source) +---- + +== Parameters + +* `field`: String or identifier specifying the part of the date / time to extract. +* `source`: Date / time value from which to extract the specifed field. + +This table shows the supported input and corresponding return types for the `EXTRACT()` function: + +[width="100%",cols="24%,55%,21%",options="header",] +|=== +|Input Type: `source` |Supported `field` values |Return Type +|`TIMESTAMP` |`YEAR`, `MONTH`, `DAY`, `HOUR`, `MINUTE`, `SECOND` |`DOUBLE PRECISION` +|`TIMESTAMPTZ` |`YEAR`, `MONTH`, `DAY`, `HOUR`, `MINUTE`, `SECOND` |`DOUBLE PRECISION` +|`DATE` |`YEAR`, `MONTH`, `DAY` |`INTEGER` +|=== + +[NOTE] +==== +The SECOND field returns a fractional value as DOUBLE PRECISION to include fractional seconds, not an integer type +==== + +== Examples + +=== EXTRACT() with timestamp - year + +This example shows how to use the `EXTRACT()` function to extract a given timestamp's year: + +[source,sql] +---- +SELECT EXTRACT(YEAR FROM TIMESTAMP '2025-12-31 13:30:15.123456'); +---- + +The query returns: + +[source,sql] +---- ++----------+ +| extract | ++----------+ +| 2025 | ++----------+ +---- + +=== EXTRACT() with timestamp - month + +This example uses the `EXTRACT()` function to extract a given timestamp's month: + +[source,sql] +---- +SELECT EXTRACT(MONTH FROM TIMESTAMP '2025-12-31 13:30:15.123456'); +---- + +The query returns the month's part of the given timestamp: + +[source,sql] +---- ++----------+ +| extract | ++----------+ +| 12 | ++----------+ +---- + +=== EXTRACT() with timestamp - seconds (including fractional seconds) + +This example uses the `EXTRACT()` function to extract a given timestamp's seconds, including fractional seconds: + +[source,sql] +---- +SELECT EXTRACT(SECOND FROM TIMESTAMP '2025-12-31 13:30:15.123456'); +---- + +The query returns the seconds' part of the given timestamp: + +[source,sql] +---- ++----------+ +| extract | ++----------+ +| 15.123456| ++----------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/format-timestamp.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/format-timestamp.adoc new file mode 100644 index 000000000..ea033e15f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/format-timestamp.adoc @@ -0,0 +1,92 @@ += FORMAT_TIMESTAMP +:description: The FORMAT_TIMESTAMP() function returns a given timestamp value in a specified format. +:page-topic-type: reference + +The `FORMAT_TIMESTAMP()` function returns a given timestamp value in a specified format. Its syntax is: + +[source,sql] +---- +FORMAT_TIMESTAMP(timestamp, format_string) +---- + +This function requires two arguments: + +* `timestamp`: A string representing the timestamp value that needs to be converted to a specified format. +* `format_string`: A string specifying the format to be converted into. + +Its return type is a timestamp value with a timezone. + +== Basic `FORMAT_TIMESTAMP()` function + +This example shows how to use the `FORMAT_TIMESTAMP()` function to convert a given timestamp into a timestamp format as specified in the function arguments. + +[source,sql] +---- +SELECT FORMAT_TIMESTAMP( 2 '2022-05-30 5:30:04', 3 'YYYY-MM-DD HH:MI:SS' 4); +---- + +Details of the format specified: + +* `YYYY` is the four-digit year 2022 +* `MM` is the month: 05 +* `DD` is the day: 30 +* `HH` is the hour: 5 +* `MI` is the minute: 30 +* `SS` is the second: 04 + +[NOTE] +==== +The format specified in the string can be used in any combination. +==== +The query returns: + +[source,sql] +---- ++-----------------------------+ +| format_timestamp | ++-----------------------------+ +| 2022-05-30 05:30:04+05 | ++-----------------------------+ +---- + +== `FORMAT_TIMESTAMP()` function using multiple spaces + +This example shows how the `FORMAT_TIMESTAMP()` function handles multiple spaces in the input string. When given multiple spaces, it omits the spaces and only returns the correct timestamp value: + +[source,sql] +---- +SELECT 2 FORMAT_TIMESTAMP('2008 Dec','YYYY MON'); +---- + +This returns the output: + +[source,sql] +---- ++-----------------------------+ +| format_timestamp | ++-----------------------------+ +| 2008-12-01 00:00:00+05 | ++-----------------------------+ +---- + +== `FORMAT_TIMESTAMP()` function if the input value of the year is less than 4 digits + +`FORMAT_TIMESTAMP()` will adjust the year to the nearest year value if the input argument has less than the required number of digits (for example, less than 4). This example shows how it works: + +[source,sql] +---- +SELECT 2 FORMAT_TIMESTAMP('07 25 09 10:40', 'MM DD YY HH:MI'); +---- + +This returns the output: + +[source,sql] +---- ++-----------------------------+ +| format_timestamp | ++-----------------------------+ +| 2009-07-25 10:40:00+06 | ++-----------------------------+ +---- + +In this example, the two-digit year `09` has been changed to the nearest four-digit year (for example, `2009`). Similarly, `70` will become `1970`, and `10` will become `2010`. diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/index.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/index.adoc new file mode 100644 index 000000000..87723a73b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/index.adoc @@ -0,0 +1,22 @@ += Overview +:description: Timestamp functions return a date-time value based on a specified timestamp/interval. + +Timestamp functions return a date-time value based on a specified timestamp/interval. Redpanda SQL supports the timestamp functions: + +[width="100%",cols="<38%,<62%",options="header",] +|=== +|*Functions* |*Description* +|xref:reference:sql/sql-functions/timestamp-functions/current-timestamp.adoc[CURRENT_TIMESTAMP()] |Returns the current date and time as a timestamp data type. +|xref:reference:sql/sql-functions/timestamp-functions/format-timestamp.adoc[FORMAT_TIMESTAMP()] |Modifies the current timestamp into a different format. +|xref:reference:sql/sql-functions/timestamp-functions/unix-seconds.adoc[UNIX_SECONDS()] |Converts a given timestamp to a UNIX timestamp in seconds. +|xref:reference:sql/sql-functions/timestamp-functions/unix-millis.adoc[UNIX_MILLIS()] |Converts a given timestamp to a UNIX timestamp in milliseconds. +|xref:reference:sql/sql-functions/timestamp-functions/unix-micros.adoc[UNIX_MICROS()] |Converts a given timestamp to a UNIX timestamp in microseconds. +|xref:reference:sql/sql-functions/timestamp-functions/timestamp-seconds.adoc[TIMESTAMP_SECONDS()] |Converts a UNIX timestamp in seconds to a timestamp. +|xref:reference:sql/sql-functions/timestamp-functions/timestamp-millis.adoc[TIMESTAMP_MILLIS()] |Converts a UNIX timestamp in milliseconds to a timestamp. +|xref:reference:sql/sql-functions/timestamp-functions/timestamp-micros.adoc[TIMESTAMP_MICROS()] |Converts a UNIX timestamp in microseconds to a timestamp. +|xref:reference:sql/sql-functions/timestamp-functions/timestamp-trunc.adoc[TIMESTAMP_TRUNC()] |Truncates a given timestamp to the nearest time part. Supported time parts are YEAR, MONTH, DAY, HOUR, MINUTE, and SECOND +|xref:reference:sql/sql-functions/timestamp-functions/extract.adoc[EXTRACT()] |Extracts some part of a specified timestamp or interval. +|xref:reference:sql/sql-functions/timestamp-functions/to-timestamp.adoc[TO_TIMESTAMP()] |Converts a string into a timestamp based on the provided format. +|xref:reference:sql/sql-functions/timestamp-functions/date-trunc.adoc[DATE_TRUNC()] |Truncates intervals or timestamps/time zones to a specified field. +|xref:reference:sql/sql-functions/timestamp-functions/to-char.adoc[TO_CHAR() from Timestamp] |Formats a timestamp into a string using a given format. +|=== diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-micros.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-micros.adoc new file mode 100644 index 000000000..33863845b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-micros.adoc @@ -0,0 +1,93 @@ += TIMESTAMP_MICROS +:description: The TIMESTAMP_MICROS() function converts a given UNIX timestamp value in microseconds since 1970-01-01 00:00:00 UTC into a timestamp. +:page-topic-type: reference + +The `TIMESTAMP_MICROS()` function converts a given UNIX timestamp value in microseconds since 1970-01-01 00:00:00 UTC into a timestamp. Its syntax is: + +[source,sql] +---- +SELECT TIMESTAMP_MICROS(BIGINT) +---- + +Its input type is a `BIGINT` expression representing a UNIX timestamp in microseconds and the return data type is a timestamp. + +== Examples + +=== Basic `TIMESTAMP_MICROS()` function + +This example shows how to use the `TIMESTAMP_MICROS()` function to convert a given UNIX timestamp in microseconds into a timestamp without a timezone: + +[source,sql] +---- +SELECT TIMESTAMP_MICROS(2280419000000000) AS timestamp_microsvalues; +---- + +The query returns: + +[source,sql] +---- ++-----------------------------+ +| timestamp_microsvalues | ++-----------------------------+ +| 2042-04-06 17:43:20 | ++-----------------------------+ +---- + +=== `TIMESTAMP_MICROS()` function using columns + +Suppose a table named **timemicro_example** has these UNIX time values in microseconds in the *unix_timestamp* column: + +[source,sql] +---- +CREATE TABLE timemicro_example ( + unix_timestamp long +); + +INSERT INTO timemicro_example VALUES +('1350417000000000'), +('2130215000000000'), +('1110115000000000'), +('2310112000000000'); +---- + +[source,sql] +---- +SELECT * FROM timemicro_example; +---- + +This query shows the table: + +[source,sql] +---- ++--------------------+ +| unix_timestamp | ++--------------------+ +| 1350417000000000 | +| 2130215000000000 | +| 1110115000000000 | +| 2310112000000000 | ++--------------------+ +---- + +To convert all UNIX timestamp values in microseconds to timestamp values, run the query: + +[source,sql] +---- +SELECT unix_timestamp, TIMESTAMP_MICROS(unix_timestamp) +AS timestamp_value +FROM timemicro_example; +---- + +The output displays all the entries in the table in UNIX timestamp format (in microseconds) in the *unix_timestamp* column and in the timestamp format in the column *timestamp_value* without timezone: + +[source,sql] +---- ++-------------------------+-----------------------+ +| unix_timestamp | timestamp_value | ++-------------------------+-----------------------+ +|1350417000000000 | 2012-10-16 19:50:00 | +|2130215000000000 | 2037-07-03 06:23:20 | +|1110115000000000 | 2005-03-06 13:16:40 | +|2310112000000000 | 2043-03-16 09:46:40 | ++-------------------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-millis.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-millis.adoc new file mode 100644 index 000000000..475e65c19 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-millis.adoc @@ -0,0 +1,90 @@ += TIMESTAMP_MILLIS +:description: The TIMESTAMP_MILLIS() function converts a given UNIX timestamp value in milliseconds since 1970-01-01 00:00:00 UTC into a timestamp. +:page-topic-type: reference + +The `TIMESTAMP_MILLIS()` function converts a given UNIX timestamp value in milliseconds since 1970-01-01 00:00:00 UTC into a timestamp. Its syntax is: + +[source,sql] +---- +SELECT TIMESTAMP_MILLIS(BIGINT) +---- + +Its input type is a `BIGINT` expression which represents a UNIX timestamp in milliseconds and the return data type is a timestamp. + +== Examples + +=== Basic `TIMESTAMP_MILLIS()` function + +This example shows how to use the `TIMESTAMP_MILLIS()` function to convert a given UNIX timestamp in milliseconds into a timestamp without a timezone. + +[source,sql] +---- +SELECT TIMESTAMP_MILLIS(1671975000000) AS timestamp_millisvalues; +---- + +The query returns: + +[source,sql] +---- ++-----------------------------+ +| timestamp_millisvalues | ++-----------------------------+ +| 2022-12-25 13:30:00 | ++-----------------------------+ +---- + +=== `TIMESTAMP_MILLIS()` function using columns + +Suppose a table named **unix_example** has these UNIX time values in milliseconds in the *unix_timestamp* column: + +[source,sql] +---- +CREATE TABLE unix_example ( + unix_timestamp long +); + +INSERT INTO unix_example VALUES +('171472000000'), +('1671975000000'), +('153276000000'); +---- + +[source,sql] +---- +SELECT * FROM unix_example; +---- + +This query shows the table: + +[source,sql] +---- ++----------------+ +| unix_timestamp | ++----------------+ +| 171472000000 | +| 1671975000000 | +| 153276000000 | ++----------------+ +---- + +To convert all UNIX timestamp values in milliseconds to timestamp values, run the query: + +[source,sql] +---- +SELECT unix_timestamp, TIMESTAMP_MILLIS(unix_timestamp) +AS timestamp_value +FROM unix_example; +---- + +The output displays all the entries in the table in UNIX timestamp format (in milliseconds) in the **unix_timestamp** column and in the timestamp format in the column** timestamp_value** without timezone. + +[source,sql] +---- ++-------------------------+-----------------------+ +| unix_timestamp | timestamp_value | ++-------------------------+-----------------------+ +|171472000000 | 1975-06-08 15:06:40 | +|1671975000000 | 2022-12-25 13:30:00 | +|153276000000 | 1974-11-10 00:40:00 | ++-------------------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-seconds.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-seconds.adoc new file mode 100644 index 000000000..be2a01ae2 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-seconds.adoc @@ -0,0 +1,90 @@ += TIMESTAMP_SECONDS +:description: The TIMESTAMP_SECONDS() function converts a given UNIX timestamp value in seconds from 1970-01-01 00:00:00 UTC into a timestamp. +:page-topic-type: reference + +The `TIMESTAMP_SECONDS()` function converts a given UNIX timestamp value in seconds from 1970-01-01 00:00:00 UTC into a timestamp. Its syntax is: + +[source,sql] +---- +SELECT TIMESTAMP_SECONDS(Int64) +---- + +Its input type is an `int64` expression representing a UNIX timestamp in seconds, and the return data type is a timestamp. + +== Examples + +=== Basic `TIMESTAMP_SECONDS()` function + +This example shows how to use the `TIMESTAMP_SECONDS()` function to convert a given UNIX timestamp in seconds into a timestamp: + +[source,sql] +---- +SELECT TIMESTAMP_SECONDS(1671975000) AS timestamp_secondsvalue; +---- + +The query returns: + +[source,sql] +---- ++-----------------------------+ +| timestamp_secondsvalue | ++-----------------------------+ +| 2022-12-25 13:30:00 | ++-----------------------------+ +---- + +=== `TIMESTAMP_SECONDS()` function using columns + +Suppose a table named **unix_time** contains these UNIX time values in seconds: + +[source,sql] +---- +CREATE TABLE unix_time ( + unix_time int +); + +INSERT INTO unix_time VALUES +('982384720'), +('1671975000'), +('171472000'); +---- + +[source,sql] +---- +SELECT * FROM unix_time; +---- + +The query shows the table: + +[source,sql] +---- ++-------------+ +| unix_time | ++-------------+ +| 982384720 | +| 1671975000 | +| 171472000 | ++-------------+ +---- + +To convert all UNIX timestamp values in seconds to timestamp values, run the query: + +[source,sql] +---- +SELECT unix_time, TIMESTAMP_SECONDS(unix_time) +AS timestamp_value +FROM unix_time ; +---- + +The output displays all the entries in the table in UNIX timestamp format (in seconds) in the *unix_time* column, and in the timestamp format without timezone in the column *timestamp_value*. + +[source,sql] +---- ++-------------------------+-----------------------+ +| unix_time | timestamp_value | ++-------------------------+-----------------------+ +| 982384720 | 2001-02-17 04:38:40 | +| 1671975000 | 2022-12-25 13:30:00 | +| 171472000 | 1975-06-08 15:06:40 | ++-------------------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-trunc.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-trunc.adoc new file mode 100644 index 000000000..8021f6f8e --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-trunc.adoc @@ -0,0 +1,83 @@ += TIMESTAMP_TRUNC +:description: The TIMESTAMP_TRUNC() function rounds a timestamp to a specific day_time granularity, resulting in a truncated timestamp. +:page-topic-type: reference + +The `TIMESTAMP_TRUNC()` function rounds a timestamp to a specific `day_time` granularity, resulting in a truncated timestamp. + +== Syntax + +[source,sql] +---- +SELECT TIMESTAMP_TRUNC(TIMESTAMP 'YYYY-MM-DD hour:min:sec', day_time); +---- + +`day_time` accepts these time values: + +* `SECOND` +* `MINUTE` +* `HOUR` +* `DAY` +* `MONTH` +* `YEAR` + +== Examples + +=== `TIMESTAMP_TRUNC()` - hour + +This example shows how to round the hour to the closest value: + +[source,sql] +---- +SELECT TIMESTAMP_TRUNC(TIMESTAMP '2017-09-18 14:43:39.02322', HOUR) ; +---- + +The final result will display the current date and time in the timezone in which the query was issued: + +[source,sql] +---- ++-----------------------------+ +| f | ++-----------------------------+ +| 2017-09-18 14:00:00.00000 | ++-----------------------------+ +---- + +=== `TIMESTAMP_TRUNC()` - minute + +This example truncates the specified timestamp into the nearest value: + +[source,sql] +---- +SELECT TIMESTAMP_TRUNC(TIMESTAMP '2005-03-18 14:13:13', MINUTE) ; +---- + +The result is the truncated timestamp: + +[source,sql] +---- ++-----------------------------+ +| f | ++-----------------------------+ +| 2005-03-18 14:13:00.00000 | ++-----------------------------+ +---- + +=== Basic `TIMESTAMP_TRUNC()` function - year + +Run this query to round the date to the closest value: + +[source,sql] +---- +SELECT TIMESTAMP_TRUNC(TIMESTAMP '2023-03-04', YEAR); +---- + +The function will truncate the year and returns: + +[source,sql] +---- ++-----------------------------+ +| f | ++-----------------------------+ +| 2023-01-01 00:00:00.00000 | ++-----------------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/to-char.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/to-char.adoc new file mode 100644 index 000000000..289ef28d8 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/to-char.adoc @@ -0,0 +1,165 @@ += TO_CHAR +:description: The TO_CHAR function formats various data types, including date/time, integer, float point and numeric into a formatted string. +:page-topic-type: reference + +The `TO_CHAR` function formats various data types, including `date/time`, `integer`, `float point` and `numeric` into a formatted string. + +== Syntax + +The syntax for this function is: + +.Timestamp +[source,sql] +---- +TO_CHAR(timestamp, format_string) +---- + +.Interval +[source,sql] +---- +TO_CHAR(interval, format_string) +---- + +== Arguments + +* `timestamp`: `TIMESTAMP` or `TIMESTAMP WITH TIMEZONE` value to format. +* `format`: Format of the output string. + +NOTE: If the format string is `NULL`, `TO_CHAR` returns an empty string (`''`) instead of `NULL`. This behavior is compatible with PostgreSQL. + +== Supported formats + +The string format supports these template patterns (case-insensitive): + +[width="100%",cols="49%,51%",options="header",] +|=== +|*Pattern* |*Description* +|`YYYY` |Year (1-9999) +|`MM` |Month number (01–12) +|`DD` |Day of month (01–31) +|`HH` |Hour of day (1–12) +|`HH12` |Hour of day (1–12) +|`HH24` |Hour of day (0–23) +|`MI` |Minute (0–59) +|`SS` |Second (0–59) +|`MS` |Millisecond (0–999) +|`US` |Microsecond (0–999999) +|`AM`, `am`, `PM` or `pm` |Meridiem indicator without periods +|`A.M.`, `a.m.`, `P.M.` or `p.m.` |Meridiem indicator with periods +|=== + +=== General restrictions + +* All text inside double quote `"\{text}"` will not be considered a pattern +* The quote character (`"`) will not appear in the result string +* Any text that is not a template pattern is copied verbatim (for example, preserved in the result string) + +=== Interval overflow restrictions + +Interval overflow occurs when an operation involving interval values exceeds the maximum limits of the interval data type, resulting in an error or unexpected behavior. This can happen when adding, subtracting or multiplying interval values that lead to a representation that goes beyond the allowable range for any of its components (for example, years, months, days, hours, minutes and seconds). When executing the `TO_CHAR` function for intervals, it is important to be aware of these overflow restrictions: + +[cols="<,^,^",options="header",] +|=== +|Conversion |Source Component |Target Component +|Days to Months |Days |Months +|Hours to Days |Hours |Days +|Seconds to Days |Seconds |Days +|=== + +All in all, for intervals the date overflow doesn't apply (units smaller than an hour can only overflow into hours, but not into days and so on), any excess units will not carry over to the next larger unit. + +== Examples + +=== Intervals + +This query converts an interval and displays it in a specified string format: + +.Month_to_Year +[source,sql] +---- +SELECT TO_CHAR('25 months'::INTERVAL,'"YEAR:" YYYY "MONTH:" MM') AS FORMATTED_INTERVAL; +---- + +.Hour_to_Day +[source,sql] +---- +SELECT TO_CHAR('13 days' + '49 hours'::INTERVAL, '"Day:" DD "Hour:" HH') AS FORMATTED_INTERVAL; +---- + +.Second_to_Minute +[source,sql] +---- +SELECT TO_CHAR('65 seconds'::INTERVAL, '"MINUTE": MI "SECOND": SS') AS FORMATTED_INTERVAL; +---- +Outputs: + +.Month_to_Year +[source,sql] +---- + FORMATTED_INTERVAL +--------------------------------------- +YEAR: 0002 MONTH: 01 +---- + +.Hour_to_Day +[source,sql] +---- + FORMATTED_INTERVAL +--------------------------------------- +Day: 13 Hour: 01 +---- + +.Second_to_Minute +[source,sql] +---- + FORMATTED_INTERVAL +--------------------------------------- +MINUTE: 01 SECOND: 05 +---- + +=== Timestamps + +This query retrieves the current timestamp and displays it in a specified string format: + +.Timestamp +[source,sql] +---- +SELECT TO_CHAR(CURRENT_TIMESTAMP(), '"YEAR:" YYYY "MONTH:" MM "DAY:" DD') AS FORMATTED_TIMESTAMP; +---- + +.Timestamp_with_Microseconds +[source,sql] +---- +SELECT TO_CHAR(CURRENT_TIMESTAMP(), 'YYYY-MM-DD HH24:MI:SS.US') AS FORMATTED_TIMESTAMP; +---- + +.Timestamp_with_Meridiem +[source,sql] +---- +SELECT TO_CHAR(CURRENT_TIMESTAMP(), 'YYYY-MM-DD HH12:MI:SS a.m.') AS FORMATTED_TIMESTAMP; +---- +Outputs: + +.Timestamp +[source,sql] +---- + FORMATTED_TIMESTAMP +--------------------------------------- +YEAR:2025 MONTH:01 DAY:01 +---- + +.Timestamp_with_Microseconds +[source,sql] +---- + FORMATTED_TIMESTAMP +--------------------------------------- +2025-01-01 08:08:03.001200 +---- + +.Timestamp_with_Meridiem +[source,sql] +---- + FORMATTED_TIMESTAMP +--------------------------------------- +2025-01-01 08:08:03 p.m. +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/to-timestamp.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/to-timestamp.adoc new file mode 100644 index 000000000..0f0258e71 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/to-timestamp.adoc @@ -0,0 +1,197 @@ += TO_TIMESTAMP +:description: The TO_TIMESTAMP() function converts a string into a timestamp based on the provided format. +:page-topic-type: reference + +The `TO_TIMESTAMP()` function converts a string into a timestamp based on the provided format. It returns a `TIMESTAMP WITH TIME ZONE` type. + +== Syntax + +The syntax for using the `TO_TIMESTAMP()` function is: + +[source,sql] +---- +SELECT TO_TIMESTAMP('source', 'format'); +---- + +* `source`: The date/time value to convert. Type is `TIMESTAMP` (`YYYY-MM-DD HH:MM:SS`). +* `format`: The format of the input string. + +NOTE: If the source string is `NULL`, `TO_TIMESTAMP` returns `NULL` instead of raising an error. This behavior is compatible with PostgreSQL. + +== Format + +Format string support these template patterns (can be lowercase): + +[cols="1,2,3",options="header"] +|=== +|Pattern |Description |Detail + +|`YYYY` +|Year (1–9999) +a|- The lowest possible value is 1 AD. + + 0001 is 1. + + 1 is 1. + +|`MM` +|Month number (1–12) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`DD` +|Day of month (1–31) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`HH` +|Hour of day (1–12) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`HH12` +|Hour of day (1–12) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`HH24` +|Hour of day (0–23) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`MI` +|Minute (0–59) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`SS` +|Second (0–59) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`MS` +|Millisecond (0–999) +a|- Up to 3 digits. + + 001 is 1 millisecond. + + 1 is 100 milliseconds. + +|`US` +|Microsecond (0–999999) +a|- Up to 6 digits. + + 000001 is 1 microsecond. + + 1 is 100000 microseconds. + +|`AM`, `am`, `PM` or `pm` +|Meridiem indicator +|Without periods. + +|`A.M.`, `a.m.`, `P.M.` or `p.m.` +|Meridiem indicator +|With periods. +|=== + +== Examples + +=== Timestamp into YYYY-MM-DD HH24:MI + +The `TO_TIMESTAMP()` function converts the provided string into a timestamp with the format `YYYY-MM-DD HH24:MI`. + +[source,sql] +---- +select TO_TIMESTAMP('2020-03-04 14:30', 'YYYY-MM-DD HH24:MI'); +---- + +The output is a timestamp with a timezone. + +[source,sql] +---- + to_timestamp +------------------------------- + 2020-03-04 14:30:00.000000+00 +---- + +=== Timestamp into MM-DD HH12:MI + +The `TO_TIMESTAMP()` function converts the provided string into a timestamp with the format `MM-DD HH12:MI`. + +[source,sql] +---- +select TO_TIMESTAMP('3-04 02:30', 'MM-DD HH12:MI'); +---- + +The output is a timestamp with a timezone. + +[source,sql] +---- + to_timestamp +---------------------------- + 1-03-04 02:30:00.000000+00 +---- + +=== Timestamp into YYYY-MM HH12:MI(AM/PM) + +The `TO_TIMESTAMP()` function converts the provided string into a timestamp with the format `YYYY-MM HH12:MI` with meridiem indicator (AM/PM). + +*Request 1* + +[source,sql] +---- +select TO_TIMESTAMP('2020-02 12:30AM', 'YYYY-MM HH12:MIPM'); +---- + +*Request 2* + +[source,sql] +---- +select TO_TIMESTAMP('2020-02 12:30AM', 'YYYY-MM HH:MIAM'); +---- + +The output of both requests is the same. It changes the time into a 12-hour format, resulting in *12:30* being adjusted to *00:30*. + +[source,sql] +---- + to_timestamp +------------------------------- + 2020-02-01 00:30:00.000000+00 +---- + +=== Timestamp into YYYY-MM-DD HH24:MI:SS.MS.US + +The `TO_TIMESTAMP()` function converts the provided string into a timestamp with `YYYY-MM-DD HH24:MI:SS.MS.US` format. + +[source,sql] +---- +select TO_TIMESTAMP('1960-01-31 15:12:02.020.001230', 'YYYY-MM-DD HH24:MI:SS.MS.US'); +---- + +The output is a timestamp with milliseconds and microseconds. + +[source,sql] +---- + to_timestamp +------------------------------- + 1960-01-31 15:12:02.021230+00 +---- + +=== Timestamp into YYYY-MM-DD HH24:MI:SS.MS + +The `TO_TIMESTAMP()` function converts the provided string into a timestamp with `YYYY-MM-DD HH24:MI:SS.MS` format. + +[source,sql] +---- +select TO_TIMESTAMP('1960-01-31 15:12:02.02', 'YYYY-MM-DD HH24:MI:SS.MS'); +---- + +The output is a timestamp with milliseconds. + +[source,sql] +---- + to_timestamp +------------------------------- + 1960-01-31 15:12:02.020000+00 +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-micros.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-micros.adoc new file mode 100644 index 000000000..97c0b13c7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-micros.adoc @@ -0,0 +1,90 @@ += UNIX_MICROS +:description: The UNIX_MICROS() function returns a given timestamp into a UNIX timestamp in microseconds, from 1970-01-01 00:00:00-00 (can be negative). +:page-topic-type: reference + +The `UNIX_MICROS()` function returns a given timestamp into a UNIX timestamp in microseconds, from 1970-01-01 00:00:00-00 (can be negative): + +[source,sql] +---- +SELECT UNIX_MICRO(TIMESTAMP) +---- + +Its input type is a TIMESTAMP expression, and the return data type is `int64` representing time in microseconds. + +== Examples + +=== Basic `UNIX_MICROS()` function + +This example shows how to use the `UNIX_MICROS()` function to convert a given timestamp into a UNIX timestamp in microseconds: + +[source,sql] +---- +SELECT UNIX_MICRO(TIMESTAMP "2022-12-25 13:30:00+00") AS unix_microsvalues; +---- + +The query returns: + +[source,sql] +---- ++-----------------------------+ +| unix_microsvalues | ++-----------------------------+ +| 1671975000000000.000000 | ++-----------------------------+ +---- + +=== `UNIX_MICROS()` function using columns + +Suppose a table named *time_example* has these timestamp values: + +[source,sql] +---- +CREATE TABLE time_example ( + time_stamp timestamp +); + +INSERT INTO time_example VALUES +('2022-12-25 13:30:00'), +('2021-10-02 06:30:00'), +('2020-09-25 07:25:00'); +---- + +[source,sql] +---- +SELECT * FROM time_example; +---- + +This query shows the table: + +[source,sql] +---- ++-------------------------+ +| time_example | ++-------------------------+ +| 2022-12-25 13:30:00 | +| 2021-10-02 06:30:00 | +| 2020-09-25 07:25:00 | ++-------------------------+ +---- + +To convert all timestamp values into UNIX timestamp values in microseconds, run the query: + +[source,sql] +---- +SELECT time_stamp, UNIX_MICROS(time_stamp) +AS time_micros +FROM time_example; +---- + +The output displays all the timestamp entries in the *time_stamp* column and the converted UNIX timestamps in microseconds in the column *time_micros*. + +[source,sql] +---- ++-------------------------+--------------------------+ +| time_stamp | time_micros | ++-------------------------+--------------------------+ +| 2022-12-25 13:30:00 | 1671975000000000.000000 | +| 2021-10-02 06:30:00 | 1633156200000000.000000 | +| 2020-09-25 07:25:00 | 1601018700000000.000000 | ++-------------------------+--------------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-millis.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-millis.adoc new file mode 100644 index 000000000..cfc9205ef --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-millis.adoc @@ -0,0 +1,88 @@ += UNIX_MILLIS +:description: The UNIX_MILLIS() function returns a given timestamp to a UNIX timestamp in milliseconds from 1970-01-01 00:00:00-00 (can be negative). +:page-topic-type: reference + +The `UNIX_MILLIS()` function returns a given timestamp to a UNIX timestamp in milliseconds from 1970-01-01 00:00:00-00 (can be negative). Its syntax is: + +[source,sql] +---- +SELECT UNIX_MILLIS(TIMESTAMP) +---- + +Its input type is a TIMESTAMP expression, and the return data type is `BIGINT` representing time in milliseconds. + +== Examples + +=== Basic `UNIX_MILLIS()` function + +This example shows how to use the `UNIX_MILLIS()` function to convert a given timestamp into a UNIX timestamp in milliseconds: + +[source,sql] +---- +SELECT UNIX_MILLIS(TIMESTAMP "1996-5-02 7:15:00+00") AS unix_millisvalues; +---- + +The query returns: + +[source,sql] +---- ++-----------------------------+ +| unix_millisvalues | ++-----------------------------+ +| 831021300000.000000 | ++-----------------------------+ +---- + +=== `UNIX_MILLIS()` function using columns + +Suppose a table named **time_example** has these timestamp values in the *time_stamp* column: + +[source,sql] +---- +CREATE TABLE time_example ( + time_stamp timestamp +); + +INSERT INTO time_example VALUES +('2004-07-23 11:30:00+00'), +('2011-02-12 04:45:00+00'), +('1975-08-03 07:50:00+00'); +---- + +[source,sql] +---- +SELECT * FROM time_example; +---- + +This query shows the table: + +[source,sql] +---- ++-------------------------+ +| time_example | ++-------------------------+ +| 2004-07-23 11:30:00 | +| 2011-02-12 04:45:00 | +| 1975-08-03 07:50:00 | ++-------------------------+ +---- + +To convert all timestamp values into UNIX timestamp values in milliseconds, run the query: + +[source,sql] +---- +SELECT time_stamp, UNIX_MILLIS(time_stamp) AS time_millis FROM time_example; +---- + +The output displays all the timestamp entries of the table in the **time_stamp** column and the converted UNIX milliseconds timestamp entries in the column *time_millis*. + +[source,sql] +---- ++-------------------------+-----------------------+ +| time_stamp | time_millis | ++-------------------------+-----------------------+ +| 2004-07-23 11:30:00 | 1090582200000.000000 | +| 2011-02-12 04:45:00 | 1297485900000.000000 | +| 1975-08-03 07:50:00 | 176284200000.000000 | ++-------------------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-seconds.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-seconds.adoc new file mode 100644 index 000000000..bfcaf6f3b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-seconds.adoc @@ -0,0 +1,93 @@ += UNIX_SECONDS +:description: The UNIX_SECONDS() function returns a given timestamp to a UNIX timestamp in seconds, from 1970-01-01 00:00:00-00. +:page-topic-type: reference + +The `UNIX_SECONDS()` function returns a given timestamp to a UNIX timestamp in seconds, from 1970-01-01 00:00:00-00. Its syntax is: + +[source,sql] +---- +SELECT UNIX_SECONDS(TIMESTAMP) +---- + +Its input type is a TIMESTAMP expression, and the return data type is `BIGINT` representing time in seconds. + +== Examples + +=== Basic `UNIX_SECONDS()` function + +This example shows how to use the `UNIX_SECONDS()` function to convert a given timestamp into a UNIX timestamp in seconds: + +[source,sql] +---- +SELECT UNIX_SECONDS(TIMESTAMP "2008-12-25 15:30:00+00") AS unix_secondsvalues; +---- + +The query returns: + +[source,sql] +---- ++-----------------------------+ +| unix_secondsvalues | ++-----------------------------+ +| 1230219000.000000 | ++-----------------------------+ +---- + +=== `UNIX_SECONDS()` function using columns + +Suppose a table named **time_example** has these timestamp values in the *time_stampvalues* column: + +[source,sql] +---- +CREATE TABLE time_example ( + time_stampvalues timestamp +); + +INSERT INTO time_example VALUES +('2022-12-25 13:30:00'), +('2020-09-25 07:25:00'), +('2008-12-25 15:30:00'), +('2021-10-02 06:30:00'); +---- + +[source,sql] +---- +SELECT * FROM time_example; +---- + +The query returns the table: + +[source,sql] +---- ++-------------------------+ +| time_stampvalues | ++-------------------------+ +| 2022-12-25 13:30:00 | +| 2020-09-25 07:25:00 | +| 2008-12-25 15:30:00 | +| 2021-10-02 06:30:00 | ++-------------------------+ +---- + +. To convert all timestamp values into UNIX timestamp values in seconds, run the query: ++ +[source,sql] +---- +SELECT time_stampvalues, UNIX_SECONDS(time_stampvalues) +AS time_secondsvalues +FROM time_example; +---- + +. The output displays all the timestamp entries of the table in the *time_stampvalues* column and the converted UNIX seconds timestamp entries in the column *time_secondsvalues*. ++ +[source,sql] +---- ++-------------------------+-----------------------+ +| time_stampvalues | time_secondsvalues | ++-------------------------+-----------------------+ +| 2022-12-25 13:30:00 | 1671975000.000000 | +| 2020-09-25 07:25:00 | 1601018700.000000 | +| 2008-12-25 15:30:00 | 1230219000.000000 | +| 2021-10-02 06:30:00 | 1633156200.000000 | ++-------------------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/avg.adoc b/modules/reference/pages/sql/sql-functions/window-functions/avg.adoc new file mode 100644 index 000000000..d58e2fc13 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/avg.adoc @@ -0,0 +1,140 @@ += AVG() +:description: The AVG() window function calculates the average (arithmetic mean) of a set of numeric values within a window. +:page-topic-type: reference + +The `AVG()` window function calculates the average (arithmetic mean) of a set of numeric values within a window. This function computes averages over a set of rows that are related to the current row, such as rows within a partition of ordered set. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +AVG(expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + [ROWS | RANGE frame_specification] +) +---- + +== Parameters + +* `expression`: Column or expression that the function operates on (must be of numeric type). +* `ROWS or RANGE`: Optional. Specifies which rows to include in the calculation relative to the current row. + +== Examples + +The examples here use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +=== Rolling average by rating + +This query uses the `AVG()` function to calculate the rolling average of `length` as rows are ordered by `rating`: + +[source,sql] +---- +SELECT + rating, + length, + AVG(length) OVER (ORDER BY rating) AS RollingAverageLength +FROM film +WHERE length IS NOT NULL +ORDER BY rating; +---- + +This query produces the output: + +[source,sql] +---- + rating | length | rollingaveragelength +--------+--------+---------------------- + 3 | 77 | 85.33333333333333 + 3 | 125 | 85.33333333333333 + 3 | 54 | 85.33333333333333 + 4 | 121 | 103.33333333333333 + 4 | 106 | 103.33333333333333 + 4 | 137 | 103.33333333333333 + 5 | 83 | 103.5 + 5 | 142 | 103.5 + 5 | 47 | 103.5 + 5 | 143 | 103.5 + 7 | 157 | 122.70588235294117 + 7 | 179 | 122.70588235294117 + 7 | 176 | 122.70588235294117 + 7 | 131 | 122.70588235294117 + 7 | 136 | 122.70588235294117 + 7 | 122 | 122.70588235294117 + 7 | 150 | 122.70588235294117 +(17 rows) +---- + +=== Time series: rolling average length over last 3 ratings + +This example demonstrates a time series-style rolling average using a window frame of the current row and the two preceding rows, ordered by rating. This simulates a moving average over a sliding window of 3 rows: + +[source,sql] +---- +SELECT + rating, + length, + AVG(length) OVER ( + ORDER BY rating + ROWS BETWEEN 2 PRECEDING AND CURRENT ROW + ) AS rolling_avg_length_3 +FROM film +WHERE length IS NOT NULL +ORDER BY rating; +---- + +This query calculates the average length over the current rating and the two previous ratings (based on ordering by rating) smoothing the fluctuations by averaging over a fixed-size window: + +[source,sql] +---- + rating | length | rolling_avg_length_3 +--------+--------+---------------------- + 3 | 77 | 65.5 + 3 | 125 | 85.33333333333333 + 3 | 54 | 54 + 4 | 121 | 107.66666666666667 + 4 | 106 | 117.33333333333333 + 4 | 137 | 121.33333333333333 + 5 | 83 | 91 + 5 | 142 | 90.66666666666667 + 5 | 47 | 109 + 5 | 143 | 128.66666666666666 + 7 | 157 | 127.33333333333333 + 7 | 179 | 159.33333333333334 + 7 | 176 | 170.66666666666666 + 7 | 131 | 162 + 7 | 136 | 147.66666666666666 + 7 | 122 | 129.66666666666666 + 7 | 150 | 136 +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/bool-and.adoc b/modules/reference/pages/sql/sql-functions/window-functions/bool-and.adoc new file mode 100644 index 000000000..d1ad4a937 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/bool-and.adoc @@ -0,0 +1,92 @@ += BOOL_AND() +:description: The BOOL_AND() window function evaluates whether all values within a specified window of rows are TRUE. +:page-topic-type: reference + +The `BOOL_AND()` window function evaluates whether all values within a specified window of rows are `TRUE`. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +BOOL_AND (expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression +) +---- + +== Parameters + +* `expression`: Column or expression that the function operates on. It should evaluate to a boolean value (`TRUE` or `FALSE`). + +== Examples + +This example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +This query uses the `BOOL_AND()` function to evaluate if all films in each rating category have a length greater than 100: + +[source,sql] +---- +SELECT + title, + length, + rating, + BOOL_AND(length > 100) OVER (PARTITION BY rating) as ALLlongFilmsByRating +FROM film +ORDER BY rating; +---- + +The query returns: + +[source,sql] +---- + title | length | rating | alllongfilmsbyrating +---------------------+--------+--------+---------------------- + KILL BROTHERHOOD | 54 | G | f + PICKUP DRIVING | 77 | G | f + SAINTS BRIDE | 125 | G | f + CHRISTMAS MOONSHINE | 150 | NC-17 | t + HOURS RAGE | 122 | NC-17 | t + PIANIST OUTFIELD | 136 | NC-17 | t + INDEPENDENCE HOTEL | 157 | NC-17 | t + FOREVER CANDIDATE | 131 | NC-17 | t + WRATH MILE | 176 | NC-17 | t + YOUTH KICK | 179 | NC-17 | t + DANGEROUS UPTOWN | 121 | PG | t + PRIVATE DROP | 106 | PG | t + SLEEPY JAPANESE | 137 | PG | t + ATTRACTION NEWTON | 83 | PG-13 | f + HALLOWEEN NUTS | 47 | PG-13 | f + MILLION ACE | 142 | PG-13 | f + CLOCKWORK PARADISE | 143 | PG-13 | f +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/bool-or.adoc b/modules/reference/pages/sql/sql-functions/window-functions/bool-or.adoc new file mode 100644 index 000000000..2deee927c --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/bool-or.adoc @@ -0,0 +1,92 @@ += BOOL_OR() +:description: The BOOL_OR() window function evaluates whether at least one value within a specified window of rows is TRUE. +:page-topic-type: reference + +The `BOOL_OR()` window function evaluates whether at least one value within a specified window of rows is `TRUE`. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +BOOL_OR (expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression +) +---- + +== Parameters + +* `expression`: Column or expression that the function operates on. It should evaluate to a boolean value (`TRUE` or `FALSE`). + +== Examples + +This example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +This query uses the `BOOL_OR()` function to evaluate whether at least one film in each rating category have a length greater than 150: + +[source,sql] +---- +SELECT + title, + length, + rating, + BOOL_OR(length > 150) OVER (PARTITION BY rating) as ALLleastOneLongFilmsByRating +FROM film +ORDER BY rating; +---- + +The query returns: + +[source,sql] +---- + title | length | rating | allleastonelongfilmsbyrating +---------------------+--------+--------+------------------------------ + KILL BROTHERHOOD | 54 | G | f + PICKUP DRIVING | 77 | G | f + SAINTS BRIDE | 125 | G | f + CHRISTMAS MOONSHINE | 150 | NC-17 | t + HOURS RAGE | 122 | NC-17 | t + PIANIST OUTFIELD | 136 | NC-17 | t + INDEPENDENCE HOTEL | 157 | NC-17 | t + FOREVER CANDIDATE | 131 | NC-17 | t + WRATH MILE | 176 | NC-17 | t + YOUTH KICK | 179 | NC-17 | t + DANGEROUS UPTOWN | 121 | PG | f + PRIVATE DROP | 106 | PG | f + SLEEPY JAPANESE | 137 | PG | f + ATTRACTION NEWTON | 83 | PG-13 | f + HALLOWEEN NUTS | 47 | PG-13 | f + MILLION ACE | 142 | PG-13 | f + CLOCKWORK PARADISE | 143 | PG-13 | f +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/count.adoc b/modules/reference/pages/sql/sql-functions/window-functions/count.adoc new file mode 100644 index 000000000..d4c0b9747 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/count.adoc @@ -0,0 +1,156 @@ += COUNT() +:description: The COUNT() window function retrieves the number of records that meet a specific criteria. +:page-topic-type: reference + +The `COUNT()` window function retrieves the number of records that meet a specific criteria. When using it with the `RANGE` clause, it performs counts within a defined range based on the values of the current row. This function can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL]. + +== Syntax + +There are two available variants of that function: + +* `COUNT(*)`: Counts all rows in the target table, regardless of whether they contain NULL values or not. +* `COUNT(expression)`: Counts the number of non-NULL values in a specific column or expression. + +The syntax for this function is: + +[source,sql] +---- +COUNT(expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + [ROWS | RANGE BETWEEN start_value AND end_value] +) +---- + +The `COUNT()` window function always return `BIGINT` as an output, which represents the total number of rows in a table irrespective of the input types. + +== Parameters + +* `expression`: Column or expression. +* `PARTITION BY`: Optional. Divides the result set into partitions, each processed independently. If omitted, the entire result set is treated as a single partition. +* `ROWS | RANGE BETWEEN`: Range-based window frame relative to the current row. + +== Examples + +The following examples use a `winsales` table that stores the details of some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== COUNT(*) + +This example executes the variant of this function that counts all rows in the target table: + +[source,sql] +---- +SELECT salesid, qty + COUNT(*) OVER (ORDER BY salesid rows unbounded preceding) AS count +FROM winsales +ORDER BY salesid; +---- + +The output displays the sales ID, quantity and the count of all rows from the start of the data window: + +[source,sql] +---- + salesid | qty | count +---------+-----+------- + 10001 | 10 | 1 + 10005 | 30 | 2 + 10006 | 10 | 3 + 20001 | 20 | 4 + 20002 | 20 | 5 + 30001 | 10 | 6 + 30003 | 15 | 7 + 30004 | 20 | 8 + 30007 | 30 | 9 + 40001 | 40 | 10 + 40005 | 10 | 11 +(11 rows) +---- + +=== Count(expression) + +This example executes the variant of this function that counts the number of non-NULL values in a specific expression: + +[source,sql] +---- +SELECT salesid, qty, qty_shipped, + COUNT(qty_shipped) OVER (ORDER BY salesid rows unbounded preceding) AS count +FROM winsales +ORDER BY salesid; +---- + +The query returns: + +[source,sql] +---- + salesid | qty | qty_shipped | count +---------+-----+-------------+------- + 10001 | 10 | 10 | 1 + 10005 | 30 | | 1 + 10006 | 10 | | 1 + 20001 | 20 | 20 | 2 + 20002 | 20 | 20 | 3 + 30001 | 10 | 10 | 4 + 30003 | 15 | | 4 + 30004 | 20 | | 4 + 30007 | 30 | | 4 + 40001 | 40 | | 4 + 40005 | 10 | 10 | 5 +(11 rows) +---- + +=== Time series: COUNT(*) with RANGE for last 90 days + +This example demonstrates counting the number of sales within a 90-day window prior to each sale, based on `dateid`: + +[source,sql] +---- +SELECT salesid, dateid, qty, + COUNT(*) OVER ( + ORDER BY dateid + RANGE BETWEEN INTERVAL '90 days' PRECEDING AND CURRENT ROW + ) AS sales_count_90d +FROM winsales +ORDER BY dateid; +---- + +This query counts the number of sales transactions within a 90-day window before each `dateid`, including the current sale: + +[source,sql] +---- + salesid | dateid | qty | sales_count_90d +---------+------------+-----+----------------- + 30001 | 2003-08-02 | 10 | 1 + 10001 | 2003-12-24 | 10 | 2 + 10005 | 2003-12-24 | 30 | 2 + 40001 | 2004-01-09 | 40 | 3 + 10006 | 2004-01-18 | 10 | 4 + 20001 | 2004-02-12 | 20 | 6 + 40005 | 2004-02-12 | 10 | 6 + 20002 | 2004-02-16 | 20 | 7 + 30003 | 2004-04-18 | 15 | 5 + 30004 | 2004-04-18 | 20 | 5 + 30007 | 2004-09-07 | 30 | 1 +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/cume-dist.adoc b/modules/reference/pages/sql/sql-functions/window-functions/cume-dist.adoc new file mode 100644 index 000000000..d4d3a08fa --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/cume-dist.adoc @@ -0,0 +1,90 @@ += CUME_DIST() +:description: The CUME_DIST() function is a window function used to calculate the cumulative distribution of a value within a set of values. +:page-topic-type: reference + +The `CUME_DIST()` function is a window function used to calculate the cumulative distribution of a value within a set of values. This function returns a value between 0 and 1, representing a relative position of a row within a partition or result set. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +CUME_DIST() OVER ( + [PARTITION BY partition_expression, ... ] + ORDER BY sort_expression [ASC | DESC], ... +---- + +== Parameters + +* `()`: This function does not take any arguments, but the parentheses are required. + +== Examples + +The following example uses a simplified version of the `film` table from the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila database^], containing only the `title`, `length` and `rating` columns. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); + +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The following query uses the `CUME_DIST()` function to calculate the cumulative distribution of film lengths: + +[source,sql] +---- +SELECT + title, + length, + CUME_DIST() OVER (ORDER BY length) AS cume_dist +FROM film; +---- + +The query returns: + +[source,sql] +---- + title | length | cume_dist +---------------------+--------+---------------------- + HALLOWEEN NUTS | 47 | 0.058823529411764705 + KILL BROTHERHOOD | 54 | 0.11764705882352941 + PICKUP DRIVING | 77 | 0.17647058823529413 + ATTRACTION NEWTON | 83 | 0.23529411764705882 + PRIVATE DROP | 106 | 0.29411764705882354 + DANGEROUS UPTOWN | 121 | 0.35294117647058826 + HOURS RAGE | 122 | 0.4117647058823529 + SAINTS BRIDE | 125 | 0.47058823529411764 + FOREVER CANDIDATE | 131 | 0.5294117647058824 + PIANIST OUTFIELD | 136 | 0.5882352941176471 + SLEEPY JAPANESE | 137 | 0.6470588235294118 + MILLION ACE | 142 | 0.7058823529411765 + CLOCKWORK PARADISE | 143 | 0.7647058823529411 + CHRISTMAS MOONSHINE | 150 | 0.8235294117647058 + INDEPENDENCE HOTEL | 157 | 0.8823529411764706 + WRATH MILE | 176 | 0.9411764705882353 + YOUTH KICK | 179 | 1 +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/dense-rank.adoc b/modules/reference/pages/sql/sql-functions/window-functions/dense-rank.adoc new file mode 100644 index 000000000..e51d53e12 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/dense-rank.adoc @@ -0,0 +1,122 @@ += DENSE_RANK() +:description: The DENSE_RANK() window function assigns a rank for each value within a specified group, based on the ORDER BY expression in the OVER clause. +:page-topic-type: reference + +The `DENSE_RANK()` window function assigns a rank for each value within a specified group, based on the `ORDER BY` expression in the `OVER` clause. Unlike the `RANK()` function, which can leave gaps in the ranking sequence when there are ties, `DENSE_RANK()` provides consecutive rank values without any gaps. This function can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL]. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +DENSE_RANK() OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression +) +---- + +The output type for this function is a `BIGINT` and it indicates the rank of values in a table, regardless of the input types. If the `ORDER BY` expression is omitted, all ranks default to 1. If an optional `PARTITION BY` expression is included, the rankings are reset for each group of rows. The rows with equal values for the ranking criteria receive the same rank. + +[NOTE] +==== +Unlike `RANK()` function, there is no gap in the sequence of ranked values (if two rows are ranked 1, the next rank is 2) +==== + +== Parameters + +* `()`: This function does not take any arguments, but the parentheses are required. +* `PARTITION BY`: Optional. Divides the result set into partitions, each processed independently. If omitted, the entire result set is treated as a single partition. +* `ORDER BY`: Order of rows in each partition to which the function is applied. + +== Examples + +The following examples use a `winsales` table that stores information about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== DENSE_RANK() with ORDER BY + +This example executes the `DENSE_RANK()` function with `ORDER BY` keyword and calculates the descending dense rank of all rows based on the quantity sold: + +[source,sql] +---- +SELECT salesid, qty + Dense_RANK() OVER (ORDER BY qty DESC) AS d_rnk + RANK() OVER (ORDER BY qty DESC) AS rnk +FROM winsales +ORDER BY 2,1; +---- + +Output that includes the sales ID along with the quantity sold and both dense and regular ranks: + +[source,sql] +---- + salesid | qty | d_rnk | rnk +---------+-----+-------+----- + 10001 | 10 | 5 | 8 + 10006 | 10 | 5 | 8 + 30001 | 10 | 5 | 8 + 40005 | 10 | 5 | 8 + 30003 | 15 | 4 | 7 + 20001 | 20 | 3 | 4 + 20002 | 20 | 3 | 4 + 30004 | 20 | 3 | 4 + 10005 | 30 | 2 | 2 + 30007 | 30 | 2 | 2 + 40001 | 40 | 1 | 1 +(11 rows) +---- + +=== DENSE_RANK() with ORDER BY and PARTITION_BY + +This example executes the `DENSE_RANK()` function with `ORDER BY` keyword and `PARTITION BY` clause, partitions the table by seller ID, orders each partition by the quantity, and assigns a dense rank to each row: + +[source,sql] +---- +SELECT salesid, sellerid, qty + DENSE_RANK() OVER (PARTITION BY sellerid ORDER BY qty DESC) AS d_rnk +FROM winsales +ORDER BY 2,3,1; +---- + +The query returns: + +[source,sql] +---- + salesid | sellerid | qty | d_rnk +---------+----------+-----+------- + 10001 | 1 | 10 | 2 + 10006 | 1 | 10 | 2 + 10005 | 1 | 30 | 1 + 20001 | 2 | 20 | 1 + 20002 | 2 | 20 | 1 + 30001 | 3 | 10 | 4 + 30003 | 3 | 15 | 3 + 30004 | 3 | 20 | 2 + 30007 | 3 | 30 | 1 + 40005 | 4 | 10 | 2 + 40001 | 4 | 40 | 1 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/first-value.adoc b/modules/reference/pages/sql/sql-functions/window-functions/first-value.adoc new file mode 100644 index 000000000..be5be6694 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/first-value.adoc @@ -0,0 +1,99 @@ += FIRST_VALUE() +:description: The FIRST_VALUE() is a window function that retrieves the first value in an ordered set of values within a specified partition. +:page-topic-type: reference + +The `FIRST_VALUE()` is a window function that retrieves the first value in an ordered set of values within a specified partition. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +FIRST_VALUE(expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + RANGE BETWEEN start_value AND end_value +) +---- + +== Parameters + +* `expression`: Target column or expression. +* `PARTITION BY`: Optional. Divides the result set into partitions, each processed independently. If omitted, the entire result set is treated as a single partition. +* `ORDER BY`: Order of rows in each partition to which the function is applied. +* `RANGE BETWEEN`: Range-based window frame relative to the current row. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The following query uses the `FIRST_VALUE()` function to retrieve the title of the film with the shortest duration, partitioning results by rating and ordering by length. + +[source,sql] +---- +SELECT + title, + length, + rating, + FIRST_VALUE(title) OVER ( + PARTITION BY rating + ORDER BY + length ASC ROWS BETWEEN UNBOUNDED PRECEDING + AND UNBOUNDED FOLLOWING + ) AS shortest_film_in_rating +FROM film; +---- + +The query returns: + +[source,bash] +---- +| title | length | rating | shortest_film_in_rating | +|---------------------|------------|------------|-----------------------------| +| KILL BROTHERHOOD | 54 | G | KILL BROTHERHOOD | +| PICKUP DRIVING | 77 | G | KILL BROTHERHOOD | +| SAINTS BRIDE | 125 | G | KILL BROTHERHOOD | +| HOURS RAGE | 122 | NC-17 | HOURS RAGE | +| FOREVER CANDIDATE | 131 | NC-17 | HOURS RAGE | +| PIANIST OUTFIELD | 136 | NC-17 | HOURS RAGE | +| CHRISTMAS MOONSHINE | 150 | NC-17 | HOURS RAGE | +| INDEPENDENCE HOTEL | 157 | NC-17 | HOURS RAGE | +| WRATH MILE | 176 | NC-17 | HOURS RAGE | +| YOUTH KICK | 179 | NC-17 | HOURS RAGE | +| PRIVATE DROP | 106 | PG | PRIVATE DROP | +| DANGEROUS UPTOWN | 121 | PG | PRIVATE DROP | +| SLEEPY JAPANESE | 137 | PG | PRIVATE DROP | +| HALLOWEEN NUTS | 47 | PG-13 | HALLOWEEN NUTS | +| ATTRACTION NEWTON | 83 | PG-13 | HALLOWEEN NUTS | +| MILLION ACE | 142 | PG-13 | HALLOWEEN NUTS | +| CLOCKWORK PARADISE | 143 | PG-13 | HALLOWEEN NUTS | +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/index.adoc b/modules/reference/pages/sql/sql-functions/window-functions/index.adoc new file mode 100644 index 000000000..eb6e98879 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/index.adoc @@ -0,0 +1,67 @@ += Overview +:description: Window functions is a group of SQL functions, that operate on a partition or "window" of a result set, returning values for every row within that wind + +Window functions is a group of SQL functions, that operate on a partition or "`window`" of a result set, returning values for every row within that window. Redpanda SQL supports the following window functions and clauses: + +== Window functions + +[width="100%",cols="40%,60%",options="header",] +|=== +|Function Name |Description +|xref:reference:sql/sql-functions/window-functions/count.adoc[COUNT] |Counts all the rows or those specified by the given expression +|xref:reference:sql/sql-functions/window-functions/avg.adoc[AVG] |Calculates the average (arithmetic mean) of a set of numeric values within a window +|xref:reference:sql/sql-functions/window-functions/sum.adoc[SUM] |Calculates and returns the sum of values from the input column or expression values +|xref:reference:sql/sql-functions/window-functions/min.adoc[MIN] |Computes the minimum value of an expression across a set of rows +|xref:reference:sql/sql-functions/window-functions/max.adoc[MAX] |Computes the maximum value of an expression across a set of rows +|xref:reference:sql/sql-functions/window-functions/bool-and.adoc[BOOL_AND] |Evaluates whether all values within a specified window of rows are true +|xref:reference:sql/sql-functions/window-functions/bool-or.adoc[BOOL_OR] |Evaluates whether at least one value within a specified window of rows is true +|=== + +== Ranking functions + +[width="100%",cols="<40%,<60%",options="header",] +|=== +|*Function Name* |*Description* +|xref:reference:sql/sql-functions/window-functions/row-number.adoc[ROW_NUMBER] |Returns the current row index within its partition (beginning with 1) +|xref:reference:sql/sql-functions/window-functions/rank.adoc[RANK] |Calculates and returns the rank of a value within a specified group of values +|xref:reference:sql/sql-functions/window-functions/dense-rank.adoc[DENSE_RANK] |Calculates the percent rank of a value within a group and returns the result +|xref:reference:sql/sql-functions/window-functions/ntile.adoc[NTILE] |Divides an ordered data set into a specified number of approximately equal groups +|=== + +== Distribution functions + +[width="100%",cols="<40%,<60%",options="header",] +|=== +|*Function Name* |*Description* +|xref:reference:sql/sql-functions/window-functions/cume-dist.adoc[CUME_DIST] |Calculates the cumulative distribution of a value within a set of values +|xref:reference:sql/sql-functions/window-functions/percent-rank.adoc[PERCENT_RANK] |Calculates and returns the percent rank of a value within a specified group of values +|=== + +== Value functions + +[width="100%",cols="<40%,<60%",options="header",] +|=== +|*Function Name* |*Description* +|xref:reference:sql/sql-functions/window-functions/first-value.adoc[FIRST_VALUE] |Returns the first value in an ordered set of values within a specified partition +|xref:reference:sql/sql-functions/window-functions/last-value.adoc[LAST_VALUE] |Returns the last value in an ordered set of values within a specified partition +|xref:reference:sql/sql-functions/window-functions/nth-value.adoc[NTH_VALUE] |Returns a value from the nth row in an ordered partition of a result set +|xref:reference:sql/sql-functions/window-functions/lag.adoc[LAG] |Returns the values for a row located at a defined offset, either above or below the current row within the partition +|xref:reference:sql/sql-functions/window-functions/lead.adoc[LEAD] |Returns the values for a row located at a defined offset, either above or below the current row within the partition +|=== + +== Window clause + +[width="100%",cols="<35%,<65%",options="header",] +|=== +|*Clause Name* |*Description* +|xref:reference:sql/sql-clauses/over-window.adoc[OVER] |Defines the window specification and is mandatory for window functions +|xref:reference:sql/sql-clauses/over-window.adoc[WINDOW] |Optional clause that defines one or more named window specifications +|=== + +== Important notes + +There are a few essential things to remember when using window functions in Redpanda SQL: + +* Verify that you can effectively use window functions alongside the `PARTITION BY`, `ORDER BY` and `FRAME` clauses as part of your window specification +* Ensure the window specification chaining is supported by executing the following command: `SELECT SUM(i0) OVER w2 FROM tb1 WINDOW w1 AS (PARTITION BY i1), w2 AS (w1 ROWS CURRENT ROW)` +* The `FRAME` clause of the window specification is restricted to the `ROWS` clause and does not include frame exclusion diff --git a/modules/reference/pages/sql/sql-functions/window-functions/lag.adoc b/modules/reference/pages/sql/sql-functions/window-functions/lag.adoc new file mode 100644 index 000000000..6806cada7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/lag.adoc @@ -0,0 +1,143 @@ += LAG() +:description: The LAG() window function returns the values from specific rows based on the offset argument (previous to the current row in the partition). +:page-topic-type: reference + +The `LAG()` window function returns the values from specific rows based on the offset argument (previous to the current row in the partition). It can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL] + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +LAG (expression, offset, default) +OVER ( + [PARTITION BY partition_expression, ... ] + ORDER BY sort_expression [ASC | DESC], ... +) +---- + +The function returns a value of the same data type as the input. If no row meets the offset criteria, the function returns a default value, which must be of a type compatible with the expression. + +== Parameters + +* `expression`: Column to reference. +* `offset`: Optional. Number of rows behind the current row. Defaults to `1`. +* `default`: Optional. Value to return if the `offset` is out of range. Defaults to `NULL`. + +== Examples + +The following examples use the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== LAG(expression, offset) + +This example executes the `LAG()` function with expression and offset parameters' values specified: + +[source,sql] +---- +SELECT buyerid, dateid, qty + LAG(qty,1) OVER (ORDER BY buyerid, dateid) AS prev_qty +FROM winsales WHERE buyerid = 'c' +ORDER BY buyerid, dateid; +---- + +The query returns the buyer ID, date ID, quantity and previous quantity for all rows with buyer ID equal to `c`: + +[source,sql] +---- + buyerid | dateid | qty | prev_qty +---------+------------+-----+---------- + c | 2003-12-24 | 10 | + c | 2004-01-18 | 10 | 10 + c | 2004-02-16 | 20 | 10 + c | 2004-09-07 | 30 | 20 +(4 rows) +---- + +=== LAG(expression, offset, default) + +This example executes the `LAG()` function with expression, offset and default parameters' values specified: + +[source,sql] +---- +SELECT buyerid, dateid, qty + LAG(buyerid,1,'unknown') OVER (ORDER BY dateid) AS prev_buyerid +FROM winsales +ORDER BY dateid; +---- + +The query returns the buyer ID, date ID, quantity and previous buyer ID for all rows: + +[source,sql] +---- + buyerid | dateid | qty | prev_buyerid +---------+------------+-----+-------------- + b | 2003-08-02 | 10 | unknown + c | 2003-12-24 | 10 | b + a | 2003-12-24 | 30 | c + a | 2004-01-09 | 40 | a + c | 2004-01-18 | 10 | a + b | 2004-02-12 | 20 | c + a | 2004-02-12 | 10 | b + c | 2004-02-16 | 20 | a + b | 2004-04-18 | 15 | c + b | 2004-04-18 | 20 | b + c | 2004-09-07 | 30 | b +(11 rows) +---- + +=== Time series: LAG() to compare daily sales quantities + +This example uses LAG() to compare each day's sales quantity (`qty`) with the previous day's quantity, ordered by `dateid`: + +[source,sql] +---- +SELECT dateid, qty, + LAG(qty) OVER (ORDER BY dateid) AS prev_day_qty, + qty - LAG(qty) OVER (ORDER BY dateid) AS qty_change +FROM winsales +ORDER BY dateid; +---- + +The query returns: + +[source,sql] +---- + dateid | qty | prev_day_qty | qty_change +------------+-----+--------------+------------ + 2003-08-02 | 10 | | + 2003-12-24 | 10 | 10 | 0 + 2003-12-24 | 30 | 10 | 20 + 2004-01-09 | 40 | 30 | 10 + 2004-01-18 | 10 | 40 | -30 + 2004-02-12 | 20 | 10 | 10 + 2004-02-12 | 10 | 20 | -10 + 2004-02-16 | 20 | 10 | 10 + 2004-04-18 | 15 | 20 | -5 + 2004-04-18 | 20 | 15 | 5 + 2004-09-07 | 30 | 20 | 10 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/last-value.adoc b/modules/reference/pages/sql/sql-functions/window-functions/last-value.adoc new file mode 100644 index 000000000..4ac2e4999 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/last-value.adoc @@ -0,0 +1,98 @@ += LAST_VALUE() +:description: The LAST_VALUE() is a window function that retrieves the last value in an ordered set of values within a specified partition. +:page-topic-type: reference + +The `LAST_VALUE()` is a window function that retrieves the last value in an ordered set of values within a specified partition. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +LAST_VALUE(expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + RANGE BETWEEN start_value AND end_value +) +---- + +== Parameters + +* `expression`: Column or expression that returns a single value. Represents the value to retrieve from the first row of the sorted partition. +* `PARTITION BY`: Optional. Divides the result set into partitions, each processed independently. If omitted, the entire result set is treated as a single partition. +* `ORDER BY`: Order of rows in each partition to which the function is applied. +* `RANGE BETWEEN`: Range-based window frame relative to the current row. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The following query uses the `LAST_VALUE()` function to retrieve the title of the film with the longest duration, partitioning results by rating and ordering by length. + +[source,sql] +---- +SELECT + title, + length, + rating, + LAST_VALUE(title) OVER ( + PARTITION BY rating + ORDER BY + length ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS longest_film_in_rating +FROM film; +---- + +The query returns: + +[source,bash] +---- +| title | length | rating | longest_film_in_rating | +|---------------------|--------|--------|------------------------| +| KILL BROTHERHOOD | 54 | G | SAINTS BRIDE | +| PICKUP DRIVING | 77 | G | SAINTS BRIDE | +| SAINTS BRIDE | 125 | G | SAINTS BRIDE | +| HOURS RAGE | 122 | NC-17 | YOUTH KICK | +| FOREVER CANDIDATE | 131 | NC-17 | YOUTH KICK | +| PIANIST OUTFIELD | 136 | NC-17 | YOUTH KICK | +| CHRISTMAS MOONSHINE | 150 | NC-17 | YOUTH KICK | +| INDEPENDENCE HOTEL | 157 | NC-17 | YOUTH KICK | +| WRATH MILE | 176 | NC-17 | YOUTH KICK | +| YOUTH KICK | 179 | NC-17 | YOUTH KICK | +| PRIVATE DROP | 106 | PG | SLEEPY JAPANESE | +| DANGEROUS UPTOWN | 121 | PG | SLEEPY JAPANESE | +| SLEEPY JAPANESE | 137 | PG | SLEEPY JAPANESE | +| HALLOWEEN NUTS | 47 | PG-13 | CLOCKWORK PARADISE | +| ATTRACTION NEWTON | 83 | PG-13 | CLOCKWORK PARADISE | +| MILLION ACE | 142 | PG-13 | CLOCKWORK PARADISE | +| CLOCKWORK PARADISE | 143 | PG-13 | CLOCKWORK PARADISE | +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/lead.adoc b/modules/reference/pages/sql/sql-functions/window-functions/lead.adoc new file mode 100644 index 000000000..c5c390680 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/lead.adoc @@ -0,0 +1,143 @@ += LEAD() +:description: The LEAD() window function takes a column and an integer offset as arguments and returns the value of the cell in that column that is located at the s +:page-topic-type: reference + +The `LEAD()` window function takes a column and an integer offset as arguments, and returns the value of the cell in the column located at the specified number of rows after the current row. It can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL] + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +LEAD (expression, offset, default) +OVER ( + [PARTITION BY partition_expression, ... ] + ORDER BY sort_expression [ASC | DESC], ... +) +---- + +The function returns a value of the same type as the input. If no row meets the offset criteria, the function returns the specified default value, which must be of a type compatible with the input. + +== Parameters + +* `expression`: Column to reference. +* `offset`: Optional. Number of rows ahead of the current row. Defaults to `1`. +* `default`: Optional. Value to return if the `offset` is out of range. Defaults to `NULL`. + +== Examples + +The following examples use the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== Lead(expression, offset) + +This example executes the `LEAD()` function with expression and offset parameters' values specified: + +[source,sql] +---- +SELECT buyerid, dateid, qty + LEAD(qty,1) OVER (ORDER BY buyerid, dateid) AS next_qty +FROM winsales WHERE buyerid = 'c' +ORDER BY buyerid, dateid; +---- + +This query returns the buyer ID, date ID, quantity and previous quantity for all rows with buyer ID equal to `c`: + +[source,sql] +---- + buyerid | dateid | qty | next_qty +---------+------------+-----+---------- + c | 2003-12-24 | 10 | 10 + c | 2004-01-18 | 10 | 20 + c | 2004-02-16 | 20 | 30 + c | 2004-09-07 | 30 | +(4 rows) +---- + +=== Expression, offset and default specified + +This example executes the `LEAD()` function with expression, offset and default parameters' values specified: + +[source,sql] +---- +SELECT buyerid, dateid, qty + LEAD(buyerid,1,'unknown') OVER (ORDER BY dateid) AS next_buyerid +FROM winsales +ORDER BY dateid; +---- + +The query returns the buyer ID, date ID, quantity and following buyer ID for all rows: + +[source,sql] +---- + buyerid | dateid | qty | next_buyerid +---------+------------+-----+-------------- + b | 2003-08-02 | 10 | c + c | 2003-12-24 | 10 | a + a | 2003-12-24 | 30 | a + a | 2004-01-09 | 40 | c + c | 2004-01-18 | 10 | b + b | 2004-02-12 | 20 | a + a | 2004-02-12 | 10 | c + c | 2004-02-16 | 20 | b + b | 2004-04-18 | 15 | b + b | 2004-04-18 | 20 | c + c | 2004-09-07 | 30 | unknown +(11 rows) +---- + +=== Time series: LEAD() to compare next day's sales quantity + +This example uses LEAD() to compare each day's sales quantity (`qty`) with the next day's quantity, ordered by `dateid`: + +[source,sql] +---- +SELECT dateid, qty, + LEAD(qty) OVER (ORDER BY dateid) AS next_day_qty, + LEAD(qty) OVER (ORDER BY dateid) - qty AS qty_change +FROM winsales +ORDER BY dateid; +---- + +The query returns: + +[source,sql] +---- + dateid | qty | next_day_qty | qty_change +------------+-----+--------------+------------ + 2003-08-02 | 10 | 10 | 0 + 2003-12-24 | 10 | 30 | 20 + 2003-12-24 | 30 | 40 | 10 + 2004-01-09 | 40 | 10 | -30 + 2004-01-18 | 10 | 20 | 10 + 2004-02-12 | 20 | 10 | -10 + 2004-02-12 | 10 | 20 | 10 + 2004-02-16 | 20 | 15 | -5 + 2004-04-18 | 15 | 20 | 5 + 2004-04-18 | 20 | 30 | 10 + 2004-09-07 | 30 | | +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/max.adoc b/modules/reference/pages/sql/sql-functions/window-functions/max.adoc new file mode 100644 index 000000000..ce1027816 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/max.adoc @@ -0,0 +1,96 @@ += MAX() +:description: The MAX() window function computes the maximum value of an expression across a set of rows defined by a window specification. +:page-topic-type: reference + +The `MAX()` window function computes the maximum value of an expression across a set of rows defined by a window specification. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +MAX ([ALL] expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + RANGE BETWEEN start_value AND end_value +) +---- + +== Parameters + +* `ALL`: Retains all duplicate values from the expression. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query uses the `MAX()` function to find the maximum length of films for each rating category and also calculates a running maximum length as you move through the films ordered by length. The `RunningMaxLength` column updates as it encounters longer films. + +[source,sql] +---- +SELECT + title, + length, + rating, + MAX(length) OVER ( PARTITION BY rating ) AS MaxLengthByRating, + MAX(length) OVER ( +ORDER BY + length ROWS BETWEEN unbounded preceding AND CURRENT ROW ) AS RunningMaxLength +FROM film +ORDER BY length; +---- + +The query returns: + +[source,sql] +---- + title | length | rating | maxlengthbyrating | runningmaxlength +---------------------+--------+--------+-------------------+------------------ + HALLOWEEN NUTS | 47 | PG-13 | 143 | 47 + KILL BROTHERHOOD | 54 | G | 125 | 54 + PICKUP DRIVING | 77 | G | 125 | 77 + ATTRACTION NEWTON | 83 | PG-13 | 143 | 83 + PRIVATE DROP | 106 | PG | 137 | 106 + DANGEROUS UPTOWN | 121 | PG | 137 | 121 + HOURS RAGE | 122 | NC-17 | 179 | 122 + SAINTS BRIDE | 125 | G | 125 | 125 + FOREVER CANDIDATE | 131 | NC-17 | 179 | 131 + PIANIST OUTFIELD | 136 | NC-17 | 179 | 136 + SLEEPY JAPANESE | 137 | PG | 137 | 137 + MILLION ACE | 142 | PG-13 | 143 | 142 + CLOCKWORK PARADISE | 143 | PG-13 | 143 | 143 + CHRISTMAS MOONSHINE | 150 | NC-17 | 179 | 150 + INDEPENDENCE HOTEL | 157 | NC-17 | 179 | 157 + WRATH MILE | 176 | NC-17 | 179 | 176 + YOUTH KICK | 179 | NC-17 | 179 | 179 +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/min.adoc b/modules/reference/pages/sql/sql-functions/window-functions/min.adoc new file mode 100644 index 000000000..f00b81c64 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/min.adoc @@ -0,0 +1,96 @@ += MIN() +:description: The MIN() window function computes the minimum value of an expression across a set of rows defined by a window specification. +:page-topic-type: reference + +The `MIN()` window function computes the minimum value of an expression across a set of rows defined by a window specification. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +MIN ([ALL] expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + RANGE BETWEEN start_value AND end_value +) +---- + +== Parameters + +* `ALL`: Retains all duplicate values from the expression. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The following query uses the `MIN()` to find the minimum length of films for each rating category and also calculates a running minimum length of films ordered by their length. + +[source,sql] +---- +SELECT + title, + length, + rating, + MIN(length) OVER ( PARTITION BY rating ) AS MinLengthByRating, + MIN(length) OVER ( +ORDER BY + length ROWS BETWEEN unbounded preceding AND CURRENT ROW ) AS RunningMinLength +FROM film +ORDER BY length; +---- + +The query returns: + +[source,sql] +---- + title | length | rating | minlengthbyrating | runningminlength +---------------------+--------+--------+-------------------+------------------ + HALLOWEEN NUTS | 47 | PG-13 | 47 | 47 + KILL BROTHERHOOD | 54 | G | 54 | 47 + PICKUP DRIVING | 77 | G | 54 | 47 + ATTRACTION NEWTON | 83 | PG-13 | 47 | 47 + PRIVATE DROP | 106 | PG | 106 | 47 + DANGEROUS UPTOWN | 121 | PG | 106 | 47 + HOURS RAGE | 122 | NC-17 | 122 | 47 + SAINTS BRIDE | 125 | G | 54 | 47 + FOREVER CANDIDATE | 131 | NC-17 | 122 | 47 + PIANIST OUTFIELD | 136 | NC-17 | 122 | 47 + SLEEPY JAPANESE | 137 | PG | 106 | 47 + MILLION ACE | 142 | PG-13 | 47 | 47 + CLOCKWORK PARADISE | 143 | PG-13 | 47 | 47 + CHRISTMAS MOONSHINE | 150 | NC-17 | 122 | 47 + INDEPENDENCE HOTEL | 157 | NC-17 | 122 | 47 + WRATH MILE | 176 | NC-17 | 122 | 47 + YOUTH KICK | 179 | NC-17 | 122 | 47 +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/nth-value.adoc b/modules/reference/pages/sql/sql-functions/window-functions/nth-value.adoc new file mode 100644 index 000000000..5080298aa --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/nth-value.adoc @@ -0,0 +1,100 @@ += NTH_VALUE() +:description: The NTH_VALUE() is a window function that accesses the value from the nth row within a specified window frame. +:page-topic-type: reference + +The `NTH_VALUE()` is a window function that accesses the value from the nth row within a specified window frame. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +NTH_VALUE (value, n) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + RANGE BETWEEN start_value AND end_value +) +---- + +== Parameters + +* `value`: Column or expression for which you want to retrieve the value. +* `n`: Positive integer (greater than zero) that determines the row number within the window frame from which to retrieve the value. +* `PARTITION BY`: Optional. Divides the result set into partitions, each processed independently. If omitted, the entire result set is treated as a single partition. +* `ORDER BY`: Optional. Specifies the order of rows in each partition to which the function is applied. +* `RANGE BETWEEN`: Optional. Defines a range-based window frame relative to the current row. + +== Examples + +The following example uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); + +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +This query uses the `NTH_VALUE()` function to retrieve the title of the film with the second shortest duration, partitioning results by rating and ordering by length: + +[source,sql] +---- +SELECT + title, + length, + rating, + NTH_VALUE(title, 2) OVER ( + PARTITION BY rating + ORDER BY + length ASC + ) AS second_shortest_film_in_rating +FROM film; +---- + +This query shows the following table: + +[source,bash] +---- +| title |length |rating | second_shortest_film_in_rating | +|---------------------|--------|--------|----------------------------------| +| KILL BROTHERHOOD | 54 | G | NULL | +| PICKUP DRIVING | 77 | G | PICKUP DRIVING | +| SAINTS BRIDE | 125 | G | PICKUP DRIVING | +| HOURS RAGE | 122 | NC-17 | NULL | +| FOREVER CANDIDATE | 131 | NC-17 | FOREVER CANDIDATE | +| PIANIST OUTFIELD | 136 | NC-17 | FOREVER CANDIDATE | +| CHRISTMAS MOONSHINE | 150 | NC-17 | FOREVER CANDIDATE | +| INDEPENDENCE HOTEL | 157 | NC-17 | FOREVER CANDIDATE | +| WRATH MILE | 176 | NC-17 | FOREVER CANDIDATE | +| YOUTH KICK | 179 | NC-17 | FOREVER CANDIDATE | +| PRIVATE DROP | 106 | PG | NULL | +| DANGEROUS UPTOWN | 121 | PG | DANGEROUS UPTOWN | +| SLEEPY JAPANESE | 137 | PG | DANGEROUS UPTOWN | +| HALLOWEEN NUTS | 47 | PG-13 | NULL | +| ATTRACTION NEWTON | 83 | PG-13 | ATTRACTION NEWTON | +| MILLION ACE | 142 | PG-13 | ATTRACTION NEWTON | +| CLOCKWORK PARADISE | 143 | PG-13 | ATTRACTION NEWTON | +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/ntile.adoc b/modules/reference/pages/sql/sql-functions/window-functions/ntile.adoc new file mode 100644 index 000000000..adf4a91dd --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/ntile.adoc @@ -0,0 +1,95 @@ += NTILE() +:description: The NTILE() function is a window function used to divide an ordered data set into a specified number of approximately equal groups or buckets. +:page-topic-type: reference + +The `NTILE()` function is a window function used to divide an ordered data set into a specified number of approximately equal groups or buckets. This function assigns each group a bucket number starting form one. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +NTILE(buckets) OVER ( + PARTITION BY partition_expression, ... ] + [ORDER BY sort_expression [ASC | DESC], ...] +) +---- + +== Parameters + +* `bucket`: Positive integer or an expression that evaluates to a positive integer for each partition. It specifies the number of groups into which the data should be divided. + +== Restrictions + +* `buckets`: Must be a positive integer. Redpanda SQL truncates non-integer constants to an integer. + +== Examples + +The following example uses a simplified version of the `film` table from the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila database^], containing only the `title`, `length` and `rating` columns. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); + +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The following query uses the `NTILE()` function to divide the films into four quartiles based on their length: + +[source,sql] +---- +SELECT + title, + length, + NTILE(4) OVER (ORDER BY length) AS quartile +FROM film; +---- + +The query returns: + +[source,sql] +---- + title | length | quartile +---------------------+--------+---------- + HALLOWEEN NUTS | 47 | 1 + KILL BROTHERHOOD | 54 | 1 + PICKUP DRIVING | 77 | 1 + ATTRACTION NEWTON | 83 | 1 + PRIVATE DROP | 106 | 1 + DANGEROUS UPTOWN | 121 | 2 + HOURS RAGE | 122 | 2 + SAINTS BRIDE | 125 | 2 + FOREVER CANDIDATE | 131 | 2 + PIANIST OUTFIELD | 136 | 3 + SLEEPY JAPANESE | 137 | 3 + MILLION ACE | 142 | 3 + CLOCKWORK PARADISE | 143 | 3 + CHRISTMAS MOONSHINE | 150 | 4 + INDEPENDENCE HOTEL | 157 | 4 + WRATH MILE | 176 | 4 + YOUTH KICK | 179 | 4 +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/percent-rank.adoc b/modules/reference/pages/sql/sql-functions/window-functions/percent-rank.adoc new file mode 100644 index 000000000..1867d4827 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/percent-rank.adoc @@ -0,0 +1,128 @@ += PERCENT_RANK() +:description: PERCENT_RANK() window function determines the relative rank of a value in a group of values, based on the ORDER BY expression in the OVER clause. +:page-topic-type: reference + +`PERCENT_RANK()` window function determines the relative rank of a value in a group of values, based on the `ORDER BY` expression in the `OVER` clause. It can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL]. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +PERCENT_RANK() OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression +) +---- + +The `PERCENT_RANK()` is calculated as: + +[source,bash] +---- +(r - 1) / (n - 1) +---- + +Where `r` is the rank of the current row and `n` is the total number of rows in the window or partition. + +Rows with equal values for the ranking criteria receive the same relative rank. The output data type for this function is `DOUBLE PRECISION`. The output will indicate the rank of values in a table, regardless of the input types. + +* If the optional `PARTITION BY` expression is present, the rankings are reset for each group of rows +* If the `ORDER BY` expression is omitted then all relative ranks are equal to 0 + +== Parameters + +* `()`: This function does not take any arguments, but the parentheses are required. +* `PARTITION BY`: Optional. Divides the result set into partitions, each processed independently. If omitted, the entire result set is treated as a single partition. +* `ORDER BY`: Order of rows in each partition to which the function is applied. + +== Examples + +The following examples use the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== PERCENT_RANK() with ORDER BY + +This example executes the `PERCENT_RANK()` function with `ORDER BY` keyword and calculates the descending percent rank of all rows based on the quantity sold: + +[source,sql] +---- +SELECT salesid, qty + PERCENT_RANK() OVER (ORDER BY qty DESC) AS p_rnk + RANK() OVER (ORDER BY qty DESC) AS rnk +FROM winsales +ORDER BY 2,1; +---- + +Output that includes the sales ID along with the quantity sold and both percent and regular ranks: + +[source,sql] +---- + salesid | qty | p_rnk | rnk +---------+-----+-------+----- + 10001 | 10 | 0.7 | 8 + 10006 | 10 | 0.7 | 8 + 30001 | 10 | 0.7 | 8 + 40005 | 10 | 0.7 | 8 + 30003 | 15 | 0.6 | 7 + 20001 | 20 | 0.3 | 4 + 20002 | 20 | 0.3 | 4 + 30004 | 20 | 0.3 | 4 + 10005 | 30 | 0.1 | 2 + 30007 | 30 | 0.1 | 2 + 40001 | 40 | 0 | 1 +---- + +=== PERCENT_RANK() with ORDER BY and PARTITION BY + +This example executes the `PERCENT_RANK()` function with `ORDER BY` keyword and `PARTITION BY` clause, partitions the table by seller ID, orders each partition by the quantity, and assigns a percent rank to each row: + +[source,sql] +---- +SELECT salesid, sellerid, qty + PERCENT_RANK() OVER (PARTITION BY sellerid ORDER BY qty DESC) AS p_rnk +FROM winsales +ORDER BY 2,3,1; +---- + +The query returns: + +[source,sql] +---- + salesid | sellerid | qty | p_rnk +---------+----------+-----+-------------------- + 10001 | 1 | 10 | 0.5 + 10006 | 1 | 10 | 0.5 + 10005 | 1 | 30 | 0 + 20001 | 2 | 20 | 0 + 20002 | 2 | 20 | 0 + 30001 | 3 | 10 | 1 + 30003 | 3 | 15 | 0.6666666666666666 + 30004 | 3 | 20 | 0.3333333333333333 + 30007 | 3 | 30 | 0 + 40005 | 4 | 10 | 1 + 40001 | 4 | 40 | 0 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/rank.adoc b/modules/reference/pages/sql/sql-functions/window-functions/rank.adoc new file mode 100644 index 000000000..f2c29f7e8 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/rank.adoc @@ -0,0 +1,117 @@ += RANK() +:description: The RANK() window function determines the rank of a value in a group of values, based on the ORDER BY expression in the OVER clause. +:page-topic-type: reference + +The `RANK()` window function determines the rank of a value in a group of values, based on the `ORDER BY` expression in the `OVER` clause. It can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL]. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +RANK() OVER ( + [PARTITION BY partition_expression, ... ] + ORDER BY sort_expression [ASC | DESC], ... +) +---- + +Rows with equal values for the ranking criteria receive the same rank. The output type for this function is `BIGINT` and it indicates the rank of values in a table, regardles of the input types. + +* If the optional `PARTITION BY` expression is present, the rankings are reset for each group of rows +* If the `ORDER BY` expression is omitted then all ranks are equal to 1 + +== Parameters + +* `()`: This function does not take any arguments, but the parentheses are required. + +== Examples + +The following examples use the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== RANK() with ORDER BY + +This example executes the `RANK()` function with `ORDER BY` keyword and calculates the rank of all rows based on the quantity sold: + +[source,sql] +---- +SELECT salesid, qty + RANK() OVER (ORDER BY qty) +FROM winsales +ORDER BY 2,1; +---- + +Output that includes the sales ID along with the quantity sold and regular ranks: + +[source,sql] +---- + salesid | qty | rank +---------+-----+------ + 10001 | 10 | 1 + 10006 | 10 | 1 + 30001 | 10 | 1 + 40005 | 10 | 1 + 30003 | 15 | 5 + 20001 | 20 | 6 + 20002 | 20 | 6 + 30004 | 20 | 6 + 10005 | 30 | 9 + 30007 | 30 | 9 + 40001 | 40 | 11 +(11 rows) +---- + +=== RANK() with ORDER BY and PARTITION BY + +This example executes the `RANK()` function with `ORDER BY` keyword and `PARTITION BY` clause, partitions the table by seller ID, orders each partition by the quantity, and assigns a rank to each row: + +[source,sql] +---- +SELECT salesid, sellerid, qty + RANK() OVER (PARTITION BY sellerid ORDER BY qty) +FROM winsales +ORDER BY 2,3,1; +---- + +The query returns: + +[source,sql] +---- + salesid | sellerid | qty | rank +---------+----------+-----+------ + 10001 | 1 | 10 | 1 + 10006 | 1 | 10 | 1 + 10005 | 1 | 30 | 3 + 20001 | 2 | 20 | 1 + 20002 | 2 | 20 | 1 + 30001 | 3 | 10 | 1 + 30003 | 3 | 15 | 2 + 30004 | 3 | 20 | 3 + 30007 | 3 | 30 | 4 + 40005 | 4 | 10 | 1 + 40001 | 4 | 40 | 2 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/row-number.adoc b/modules/reference/pages/sql/sql-functions/window-functions/row-number.adoc new file mode 100644 index 000000000..f0ec6c158 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/row-number.adoc @@ -0,0 +1,146 @@ += ROW_NUMBER +:description: The ROW_NUMBER() window function returns the number of the current row within its partition (counting from 1), based on the ORDER BY expression in the +:page-topic-type: reference + +The `ROW_NUMBER()` window function returns the number of the current row within its partition (counting from 1), based on the `ORDER BY` expression in the `OVER` clause. It can be used with all xref:reference:sql/sql-data-types/index.adoc[data types] supported by Redpanda SQL. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +ROW_NUMBER() OVER ( + [PARTITION BY partition_expression, ... ] + ORDER BY sort_expression [ASC | DESC], ... +) +---- + +The function returns a value of type `BIGINT`. Rows with equal values for the `ORDER BY` expression receive different row numbers nondeterministically. + +== Parameters + +* `()`: This function does not take any arguments, but the parentheses are required. + +== Examples + +The following examples use the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== ROW_NUMBER() with ORDER BY + +This example executes the `ROW_NUMBER()` function with `ORDER BY` keyword, assigns a row number to each row, and orders the table by the row number (the results are sorted after the window function results are applied): + +[source,sql] +---- +SELECT salesid, qty + ROW_NUMBER() OVER (ORDER BY salesid) +FROM winsales +ORDER BY 3; +---- + +The query returns: + +[source,sql] +---- + salesid | qty | row_number +---------+-----+------------ + 10001 | 10 | 1 + 10005 | 30 | 2 + 10006 | 10 | 3 + 20001 | 20 | 4 + 20002 | 20 | 5 + 30001 | 10 | 6 + 30003 | 15 | 7 + 30004 | 20 | 8 + 30007 | 30 | 9 + 40001 | 40 | 10 + 40005 | 10 | 11 +(11 rows) +---- + +=== ROW_NUMBER() with ORDER BY and PARTITION BY + +This example executes the `ROW_NUMBER()` function with `ORDER BY` keyword and `PARTITION BY` clause, partitions the table by seller ID, assigns a row number to each row, and orders the table by the sales ID and row number (the results are sorted after the window function results are applied): + +[source,sql] +---- +SELECT salesid, sellerid, qty + ROW_NUMBER() OVER (PARTITION BY sellerid ORDER BY salesid) +FROM winsales +ORDER BY 1; +---- + +The query returns: + +[source,sql] +---- + salesid | sellerid | qty | row_number +---------+----------+-----+------------ + 10001 | 1 | 10 | 1 + 10005 | 1 | 30 | 2 + 10006 | 1 | 10 | 3 + 20001 | 2 | 20 | 1 + 20002 | 2 | 20 | 2 + 30001 | 3 | 10 | 1 + 30003 | 3 | 15 | 2 + 30004 | 3 | 20 | 3 + 30007 | 3 | 30 | 4 + 40001 | 4 | 40 | 1 + 40005 | 4 | 10 | 2 +(11 rows) +---- + +=== Time series: assigning sequential row numbers by date + +This example assigns a sequential row number to each sale ordered by `dateid`: + +[source,sql] +---- +SELECT dateid, salesid, qty, + ROW_NUMBER() OVER (ORDER BY dateid, salesid) AS time_series_position +FROM winsales +ORDER BY dateid, salesid; +---- + +The query returns: + +[source,sql] +---- + dateid | salesid | qty | time_series_position +------------+---------+-----+---------------------- + 2003-08-02 | 30001 | 10 | 1 + 2003-12-24 | 10001 | 10 | 2 + 2003-12-24 | 10005 | 30 | 3 + 2004-01-09 | 40001 | 40 | 4 + 2004-01-18 | 10006 | 10 | 5 + 2004-02-12 | 20001 | 20 | 6 + 2004-02-12 | 40005 | 10 | 7 + 2004-02-16 | 20002 | 20 | 8 + 2004-04-18 | 30003 | 15 | 9 + 2004-04-18 | 30004 | 20 | 10 + 2004-09-07 | 30007 | 30 | 11 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/sum.adoc b/modules/reference/pages/sql/sql-functions/window-functions/sum.adoc new file mode 100644 index 000000000..2d1f82138 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/sum.adoc @@ -0,0 +1,191 @@ += SUM() +:description: The SUM() window function returns the sum of the input column or expression values. +:page-topic-type: reference + +The `SUM()` window function returns the sum of the input column or expression values. It can be used with a `RANGE` clause, that defines a logical frame of rows based on the values of the current row, rather than a fixed number of rows. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +SUM(expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + [ROWS | RANGE BETWEEN start_value AND end_value] +) +---- + +The expression's argument types supported by the `SUM` window function are `INTEGER`, `BIGINT`, `REAL` and `DOUBLE PRECISION`. The return types of the `SUM` function are: `BIGINT` for integer and `DOUBLE PRECISION` for floating-point arguments. + +[NOTE] +==== +The `SUM()` window function works with numeric values and ignores NULL ones +==== + +== Parameters + +* `expression`: Column or expression to sum. +* `PARTITION BY`: Optional. Divides the result set into partitions, each processed independently. If omitted, the entire result set is treated as a single partition. +* `ROWS | RANGE BETWEEN`: Range-based window frame relative to the current row. + +== Examples + +The following examples use the `winsales` table that stores details of some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== SUM() with ORDER BY + +This example executes the `SUM()` window function with `ORDER BY` keyword: + +[source,sql] +---- +SELECT salesid, dateid, sellerid, qty + SUM(qty) OVER (ORDER BY dateid, salesid ROWS UNBOUNDED PRECEDING) +FROM winsales +ORDER BY 2,1; +---- + +The output from this query includes the sales ID, date ID, seller ID, quantity and quantity sum: + +[source,sql] +---- + salesid | dateid | sellerid | qty | sum +---------+------------+----------+-----+----- + 30001 | 2003-08-02 | 3 | 10 | 10 + 10001 | 2003-12-24 | 1 | 10 | 20 + 10005 | 2003-12-24 | 1 | 30 | 50 + 40001 | 2004-01-09 | 4 | 40 | 90 + 10006 | 2004-01-18 | 1 | 10 | 100 + 20001 | 2004-02-12 | 2 | 20 | 120 + 40005 | 2004-02-12 | 4 | 10 | 130 + 20002 | 2004-02-16 | 2 | 20 | 150 + 30003 | 2004-04-18 | 3 | 15 | 165 + 30004 | 2004-04-18 | 3 | 20 | 185 + 30007 | 2004-09-07 | 3 | 30 | 215 +(11 rows) +---- + +=== SUM() with ORDER BY and ROWS frame + +This example calculates the running total of `qty` ordered by dateid and salesid using a `ROWS UNBOUNDED PRECEDING` frame, which sums all rows from the start up to the current row: + +[source,sql] +---- +SELECT salesid, dateid, sellerid, qty, + SUM(qty) OVER (ORDER BY dateid, salesid ROWS UNBOUNDED PRECEDING) AS running_qty_sum +FROM winsales +ORDER BY dateid, salesid; +---- + +The query returns: + +[source,sql] +---- + salesid | dateid | qty | running_qty_sum +---------+------------+-----+----------------- + 30001 | 2003-08-02 | 10 | 10 + 10001 | 2003-12-24 | 10 | 20 + 10005 | 2003-12-24 | 30 | 50 + 40001 | 2004-01-09 | 40 | 90 + 10006 | 2004-01-18 | 10 | 100 + 20001 | 2004-02-12 | 20 | 120 + 40005 | 2004-02-12 | 10 | 130 + 20002 | 2004-02-16 | 20 | 150 + 30003 | 2004-04-18 | 15 | 165 + 30004 | 2004-04-18 | 20 | 185 + 30007 | 2004-09-07 | 30 | 215 +(11 rows) +---- + +The `running_qty_sum` column shows the cumulative sum of `qty` ordered by `dateid` and `salesid`. For each row, it sums all `qty` values from the first row up to the current row in that order. + +=== SUM() with ORDER BY and PARTITION BY + +This example executes the `SUM()` function with `ORDER BY` keyword and `PARTITION BY` clause: + +[source,sql] +---- +SELECT salesid, dateid, sellerid, qty + SUM(qty) OVER (PARTITION BY sellerid ORDER BY dateid, sellerid ROWS UNBOUNDED PRECEDING) +FROM winsales +ORDER BY 3,2,1; +---- + +The query returns: + +[source,sql] +---- + salesid | dateid | sellerid | qty | sum +---------+------------+----------+-----+----- + 10001 | 2003-12-24 | 1 | 10 | 10 + 10005 | 2003-12-24 | 1 | 30 | 40 + 10006 | 2004-01-18 | 1 | 10 | 50 + 20001 | 2004-02-12 | 2 | 20 | 20 + 20002 | 2004-02-16 | 2 | 20 | 40 + 30001 | 2003-08-02 | 3 | 10 | 10 + 30003 | 2004-04-18 | 3 | 15 | 25 + 30004 | 2004-04-18 | 3 | 20 | 45 + 30007 | 2004-09-07 | 3 | 30 | 75 + 40001 | 2004-01-09 | 4 | 40 | 40 + 40005 | 2004-02-12 | 4 | 10 | 50 +(11 rows) +---- + +=== Time series: SUM() with RANGE BETWEEN for last 30 days + +This example demonstrates a common time series use case: calculating the rolling sum of sales quantity over the last 30 days for each row, using the RANGE BETWEEN INTERVAL '`30 days`' PRECEDING AND CURRENT ROW frame: + +[source,sql] +---- +SELECT salesid, dateid, qty, + SUM(qty) OVER ( + ORDER BY dateid + RANGE BETWEEN INTERVAL '30 days' PRECEDING AND CURRENT ROW + ) AS rolling_30d_qty_sum +FROM winsales +ORDER BY dateid; +---- + +The output from this query sums the `qty` of all sales within the 30-day window ending at the current row's `dateid`: + +[source,sql] +---- + salesid | dateid | qty | rolling_30d_qty_sum +---------+------------+-----+--------------------- + 30001 | 2003-08-02 | 10 | 10 + 10001 | 2003-12-24 | 10 | 40 + 10005 | 2003-12-24 | 30 | 40 + 40001 | 2004-01-09 | 40 | 80 + 10006 | 2004-01-18 | 10 | 90 + 20001 | 2004-02-12 | 20 | 40 + 40005 | 2004-02-12 | 10 | 40 + 20002 | 2004-02-16 | 20 | 60 + 30003 | 2004-04-18 | 15 | 35 + 30004 | 2004-04-18 | 20 | 35 + 30007 | 2004-09-07 | 30 | 30 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-operators/bitwise-shift-left.adoc b/modules/reference/pages/sql/sql-operators/bitwise-shift-left.adoc new file mode 100644 index 000000000..5557db819 --- /dev/null +++ b/modules/reference/pages/sql/sql-operators/bitwise-shift-left.adoc @@ -0,0 +1,97 @@ += BITWISE SHIFT LEFT +:description: Bitwise shift operators in Redpanda SQL manipulate the bits of integer value by shifting them left or right. +:page-topic-type: reference + +Bitwise shift operators in Redpanda SQL manipulate the bits of integer value by shifting them left or right. These operations are fundamental in low-level data processing and optimization. + +The bitwise *left shift (`<<`)* operator shifts the bits of an integer to the left by the specified shift amount. For *integers*, this operation is equivalent to multiplying the integer value by 2 raised to the power of the shift amount. During this operation, high-order bits that are shifted out are permanently lost without the ability to be preserved, while zeros are shifted in from the right to fill the vacant positions. Because the left shifts operation (<<) on signed integers is *arithmetic*, meaning it shifts all bits to the left and fills the vacant rightmost bits with zeros on the right, the behavior is the same as a logical shift in this case. However, the overall length of the bit string is preserved, with zeros padding on the right to maintain the length. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +value << shift_amount +---- + +== Parameters + +* `value`: Integer expression. +* `shift_amount`: A *non-negative* integer specifying how many bit positions to shift. + +== Restrictions + +Bitwise shift operators in Redpanda SQL require the shift amount to be a *non-negative* integer. Redpanda SQL treats negative shift counts as valid by applying modulo arithmetic based on the bit width, so shifting `1 << -3` in a 32-bit integer is equivalent to shifting `1 << 29`, producing predictable results without errors or undefined behavior. + +When performing bitwise left shift operations (<<) on 32-bit integer values in Redpanda SQL, the shift count is taken *modulo* 32. This means: * Shifting by a number of bits greater than or equal to 32 wraps around * For example, `1 << 35` is equivalent to `1 << 3` because `35`latexmath:[modulo]`32 = 3` + +[WARNING] +==== +If you shift by a value larger than or equal to 32, the actual shift is the remainder after dividing by 32, which may lead to unexpected results if not carefully considered. +==== + +== Examples + +This section uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `rating` and `privilegs` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title TEXT NOT NULL, + rating TEXT, + privileges INT NOT NULL +); +INSERT INTO film(title, rating, privileges) VALUES + ('ATTRACTION NEWTON', 'PG-13', 1), -- Free users + ('CHRISTMAS MOONSHINE', 'NC-17', 2), -- Premium users + ('DANGEROUS UPTOWN', 'PG', 3), -- Free + Premium users (bits 0 and 1) + ('KILL BROTHERHOOD', 'G', 4), -- Admin-only content + ('HALLOWEEN NUTS', 'PG-13', 1), + ('HOURS RAGE', 'NC-17', 2), + ('PIANIST OUTFIELD', 'NC-17', 3), + ('PICKUP DRIVING', 'G', 4), + ('INDEPENDENCE HOTEL', 'NC-17', 1), + ('PRIVATE DROP', 'PG', 2), + ('SAINTS BRIDE', 'G', 3), + ('FOREVER CANDIDATE', 'NC-17', 4), + ('MILLION ACE', 'PG-13', 1), + ('SLEEPY JAPANESE', 'PG', 2), + ('WRATH MILE', 'NC-17', 3), + ('YOUTH KICK', 'NC-17', 4), + ('CLOCKWORK PARADISE', 'PG-13', 1); +---- + +[NOTE] +==== +* Privilege 1 (binary 0001): Free users can watch. +* Privilege 2 (binary 0010): Premium users can watch. +* Privilege 3 (binary 0011): Both free and premium users can watch. +* Privilege 4 (binary 0100): Admin-only content. +==== +The following query uses the integer `Left shift (<<)` operation, shifting the privileges value left by 1 for the movie '`ATTRACTION NEWTON`': + +[source,sql] +---- +UPDATE film +SET privileges = privileges << 1 +WHERE title = 'ATTRACTION NEWTON'; +---- + +After running the update, you can verify the change with: + +[source,sql] +---- +SELECT title, privileges FROM film WHERE title = 'ATTRACTION NEWTON'; +---- + +Expected output: + +[source,sql] +---- + title | privileges +-------------------+------------ + ATTRACTION NEWTON | 2 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-operators/bitwise-shift-right.adoc b/modules/reference/pages/sql/sql-operators/bitwise-shift-right.adoc new file mode 100644 index 000000000..e3fc67643 --- /dev/null +++ b/modules/reference/pages/sql/sql-operators/bitwise-shift-right.adoc @@ -0,0 +1,98 @@ += BITWISE SHIFT RIGHT +:description: Bitwise shift operators in Redpanda SQL manipulate the bits of integer value by shifting them left or right. +:page-topic-type: reference + +Bitwise shift operators in Redpanda SQL manipulate the bits of integer value by shifting them left or right. These operations are fundamental in low-level data processing and optimization. + +The bitwise *right shift (`>>`)* operator shifts the bits of an integer to the right by the specified number of positions. For *integers*, this operation is equivalent to dividing the integer value by 2 raised to the power of the shift amount, discarding any remainder. Unlike a logical shift, the right shift in Redpanda SQL is an *arithmetic* shift, meaning that the vacant leftmost bits are filled with the original sign bits (the most significant bit) rather than zeros. This preserves the sign of the integer after the shift, ensuring correct behavior for signed values. During the shift, low-order bits that move beyond the size limit are permanently lost. However, the overall length of the bit string is preserved, with zeros padding on the left side to maintain the length. + +== Syntax + +The syntax for this function is: + +[source,sql] +---- +value >> shift_amount +---- + +== Parameters + +* `value`: Integer expression. +* `shift_amount`: A *non-negative* integer specifying how many bit positions to shift. + +== Restrictions + +Bitwise shift operators in Redpanda SQL require the shift amount to be a *non-negative* integer. Redpanda SQL treats negative shift counts as valid by applying modulo arithmetic based on the bit width, so shifting `1 >> -3` in a 32-bit integer is equivalent to shifting `1 >> 29`, producing predictable results without errors or undefined behavior. + +When performing bitwise right shift operations (>>) on 32-bit integer values in Redpanda SQL, the shift count is taken *modulo* 32, just as with left shifts. This means: * Shifting by a number of bits greater than or equal to 32 wraps around * For example, `1 >> 35` is equivalent to `1 >> 3` because `35`latexmath:[modulo]`32 = 3` + +[WARNING] +==== +If you shift by a value larger than or equal to 32, the actual shift is the remainder after dividing by 32, +which may lead to unexpected results if not carefully considered. +==== + +== Examples + +This section uses a simplified version of the `film` table from the Pagila database, containing only the `title`, `rating` and `privilegs` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title TEXT NOT NULL, + rating TEXT, + privileges INT NOT NULL +); +INSERT INTO film(title, rating, privileges) VALUES + ('ATTRACTION NEWTON', 'PG-13', 1), -- Free users + ('CHRISTMAS MOONSHINE', 'NC-17', 2), -- Premium users + ('DANGEROUS UPTOWN', 'PG', 3), -- Free + Premium users (bits 0 and 1) + ('KILL BROTHERHOOD', 'G', 4), -- Admin-only content + ('HALLOWEEN NUTS', 'PG-13', 1), + ('HOURS RAGE', 'NC-17', 2), + ('PIANIST OUTFIELD', 'NC-17', 3), + ('PICKUP DRIVING', 'G', 4), + ('INDEPENDENCE HOTEL', 'NC-17', 1), + ('PRIVATE DROP', 'PG', 2), + ('SAINTS BRIDE', 'G', 3), + ('FOREVER CANDIDATE', 'NC-17', 4), + ('MILLION ACE', 'PG-13', 1), + ('SLEEPY JAPANESE', 'PG', 2), + ('WRATH MILE', 'NC-17', 3), + ('YOUTH KICK', 'NC-17', 4), + ('CLOCKWORK PARADISE', 'PG-13', 1); +---- + +[NOTE] +==== +* Privilege 1 (binary 0001): Free users can watch. +* Privilege 2 (binary 0010): Premium users can watch. +* Privilege 3 (binary 0011): Both free and premium users can watch. +* Privilege 4 (binary 0100): Admin-only content. +==== +The following query uses the integer `right shift (>>)` operation, shifting the privileges value right by 1 for the movie '`DANGEROUS UPTOWN`': + +[source,sql] +---- +UPDATE film +SET privileges = privileges >> 1 +WHERE title = 'DANGEROUS UPTOWN'; +---- + +After running the update, you can verify the change with: + +[source,sql] +---- +SELECT title, privileges FROM film WHERE title = 'DANGEROUS UPTOWN'; +---- + +Expected output: + +[source,sql] +---- + title | privileges +-------------------+------------ + DANGEROUS UPTOWN | 1 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-operators/index.adoc b/modules/reference/pages/sql/sql-operators/index.adoc new file mode 100644 index 000000000..72bb32bd9 --- /dev/null +++ b/modules/reference/pages/sql/sql-operators/index.adoc @@ -0,0 +1,113 @@ += Operators +:description: Operators in Redpanda SQL are special symbols used in expressions to compare, combine, or manipulate values. +:page-topic-type: reference + +Operators in Redpanda SQL are special symbols used in expressions to compare, combine, or manipulate values. + +== Comparison operators + +[cols="1,1,3,2",options="header"] +|=== +|Operator |Name |Description |Example +|`=` |Equal to |The value of one item is equal to another item's value. |`cust_name = 'Mike'` +|`<>` or `!=` |Not equal to |The value of one item is not equal to the other item's value. |`subj_score != 'FAILED'` +|`>` |Greater than |The value of one item is greater than another item's value. |`stock_value > 10` +|`<` |Less than |The value of one item is less than another item's value. |`stock_value < 20` +|`>=` |Greater than or equal to |The value of one item is greater than or equal to the other item's value. |`prod_price >= 3000` +|`+<=+` |Less than or equal to |The value of one item is less than or equal to the other item's value. |`prod_price +<=+ 9000` +|=== + +== Logical operators + +[cols="1,1,3,2",options="header"] +|=== +|Operator |Name |Description |Example +|`AND` |Logical AND |Returns true if both conditions are true. |`a > 1 AND b < 10` +|`OR` |Logical OR |Returns true if at least one condition is true. |`a = 1 OR a = 2` +|`NOT` |Logical NOT |Reverses the result of a condition. |`NOT true = false` +|=== + +== Null and boolean test operators + +[cols="1,3,2",options="header"] +|=== +|Operator |Description |Example +|`IS NULL` |Returns true if the value is NULL. |`WHERE salary IS NULL` +|`IS NOT NULL` |Returns true if the value is not NULL. |`WHERE salary IS NOT NULL` +|`IS DISTINCT FROM` |Returns true if the two values are not equal, treating NULL as a comparable value. |`a IS DISTINCT FROM b` +|`IS NOT DISTINCT FROM` |Returns true if the two values are equal, treating NULL as a comparable value. |`a IS NOT DISTINCT FROM b` +|`IS TRUE` |Returns true if the boolean value is true. |`WHERE active IS TRUE` +|`IS NOT TRUE` |Returns true if the boolean value is not true (false or NULL). |`WHERE active IS NOT TRUE` +|`IS FALSE` |Returns true if the boolean value is false. |`WHERE active IS FALSE` +|`IS NOT FALSE` |Returns true if the boolean value is not false (true or NULL). |`WHERE active IS NOT FALSE` +|=== + +== Arithmetic operators + +[cols="1,1,3,2",options="header"] +|=== +|Operator |Name |Description |Example +|`+` |Addition |Adds two values, or acts as unary positive. |`salary + bonus` +|`-` |Subtraction |Subtracts one value from another, or negates a value. |`price - discount` +|`*` |Multiplication |Multiplies two values. |`quantity * price` +|`/` |Division |Divides one value by another. |`total / count` +|`%` |Modulus |Returns the remainder of a division. |`10 % 3` +|=== + +== Mathematical operators + +[cols="1,1,3,2",options="header"] +|=== +|Operator |Name |Description |Example +|`@` |Absolute value |Returns the absolute value of a number. |`@ -5` +|`^` |Exponentiation |Raises a number to a power. |`2 ^ 3` +|`\|/` |Square root |Returns the square root of a number. |`\|/ 25` +|`\|\|/` |Cube root |Returns the cube root of a number. |`\|\|/ 27` +|=== + +== Bitwise operators + +[cols="1,1,3,2",options="header"] +|=== +|Operator |Name |Description |Example +|`&` |Bitwise AND |Performs a bitwise AND on two integers. |`5 & 3` +|`\|` |Bitwise OR |Performs a bitwise OR on two integers. |`5 \| 3` +|`#` |Bitwise XOR |Performs a bitwise exclusive OR on two integers. |`5 # 3` +|`~` |Bitwise NOT |Performs a bitwise NOT (complement) on an integer. |`~ 5` +|xref:reference:sql/sql-operators/bitwise-shift-left.adoc[`<<`] |Bitwise shift left |Shifts the bits of an integer to the left. |`1 << 4` +|xref:reference:sql/sql-operators/bitwise-shift-right.adoc[`>>`] |Bitwise shift right |Shifts the bits of an integer to the right. |`16 >> 2` +|=== + +== String and pattern matching operators + +[cols="1,1,3,2",options="header"] +|=== +|Operator |Name |Description |Example +|`\|\|` |Concatenation |Concatenates two strings, or appends elements to an array. |`'foo' \|\| 'bar'` +|`~~` |LIKE |Returns true if the string matches the pattern. |`name ~~ 'J%'` +|`!~~` |NOT LIKE |Returns true if the string does not match the pattern. |`name !~~ 'J%'` +|`~~*` |ILIKE |Case-insensitive LIKE pattern matching. |`name ~~* 'j%'` +|`!~~*` |NOT ILIKE |Case-insensitive NOT LIKE pattern matching. |`name !~~* 'j%'` +|`~` |Regex match |Returns true if the string matches the regular expression. |`name ~ '^J'` +|`~*` |Regex match (case-insensitive) |Case-insensitive regular expression match. |`name ~* '^j'` +|`!~` |Regex not match |Returns true if the string does not match the regular expression. |`name !~ '^J'` +|`!~*` |Regex not match (case-insensitive) |Case-insensitive regular expression non-match. |`name !~* '^j'` +|=== + +== JSON operators + +[cols="1,1,3,2",options="header"] +|=== +|Operator |Name |Description |Example +|`->` |JSON extract |Extracts a JSON object field or array element, returning JSON. |`data -> 'key'` +|`->>` |JSON extract text |Extracts a JSON object field or array element as text. |`data ->> 'key'` +|=== + +== Array operators + +[cols="1,1,3,2",options="header"] +|=== +|Operator |Name |Description |Example +|`= ANY` |Any element equals |Returns true if any element in the array matches the value. |`3 = ANY(ARRAY[1,2,3])` +|`= ALL` |All elements equal |Returns true if all elements in the array match the value. |`1 = ALL(ARRAY[1,1,1])` +|=== diff --git a/modules/reference/pages/sql/sql-statements/alter-redpanda-catalog.adoc b/modules/reference/pages/sql/sql-statements/alter-redpanda-catalog.adoc new file mode 100644 index 000000000..10645e4cd --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/alter-redpanda-catalog.adoc @@ -0,0 +1,27 @@ += ALTER REDPANDA CATALOG +:description: The ALTER REDPANDA CATALOG statement modifies connection properties of an existing Redpanda catalog. +:page-topic-type: reference + +The `ALTER REDPANDA CATALOG` statement modifies connection properties of an existing Redpanda catalog. + +== Syntax + +[source,sql] +---- +ALTER REDPANDA CATALOG [IF EXISTS] catalog_name +WITH (option = 'value' [, ...]); +---- + +* `catalog_name`: Name of the catalog to modify. +* `IF EXISTS`: Optional. Prevents an error if the catalog does not exist. +* `option = 'value'`: One or more connection options to update. See xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[CREATE REDPANDA CATALOG] for the full list of options. + +== Examples + +Update the broker address for an existing catalog: + +[source,sql] +---- +ALTER REDPANDA CATALOG my_catalog +WITH (initial_brokers = 'new-broker:9092'); +---- diff --git a/modules/reference/pages/sql/sql-statements/alter-storage.adoc b/modules/reference/pages/sql/sql-statements/alter-storage.adoc new file mode 100644 index 000000000..108464989 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/alter-storage.adoc @@ -0,0 +1,27 @@ += ALTER STORAGE +:description: The ALTER STORAGE statement modifies credentials or configuration of an existing storage connection. +:page-topic-type: reference + +The `ALTER STORAGE` statement modifies the credentials or configuration of an existing storage connection. + +== Syntax + +[source,sql] +---- +ALTER STORAGE [IF EXISTS] storage_name +WITH (option = 'value' [, ...]); +---- + +* `storage_name`: Name of the storage connection to modify. +* `IF EXISTS`: Optional. Prevents an error if the storage connection does not exist. +* `option = 'value'`: One or more options to update. See xref:reference:sql/sql-statements/create-storage.adoc[CREATE STORAGE] for the full list of options. + +== Examples + +Update the region for an existing storage connection: + +[source,sql] +---- +ALTER STORAGE my_s3_storage +WITH (region = 'eu-west-1'); +---- diff --git a/modules/reference/pages/sql/sql-statements/alter-table.adoc b/modules/reference/pages/sql/sql-statements/alter-table.adoc new file mode 100644 index 000000000..49cf42215 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/alter-table.adoc @@ -0,0 +1,28 @@ += ALTER TABLE +:description: The ALTER TABLE statement modifies options of a catalog table mapped to a Redpanda topic. +:page-topic-type: reference + +The `ALTER TABLE` statement modifies the options of a catalog table mapped to a Redpanda topic. + +== Syntax + +[source,sql] +---- +ALTER TABLE [IF EXISTS] catalog_name=>table_name +WITH (option = 'value' [, ...]); +---- + +* `catalog_name`: Name of the Redpanda catalog containing the table. +* `table_name`: Name of the table to modify. +* `IF EXISTS`: Optional. Prevents an error if the table does not exist. +* `option = 'value'`: One or more table options to update. See xref:reference:sql/sql-statements/create-table.adoc[CREATE TABLE] for the full list of options. + +== Examples + +Update the error handling policy for an existing catalog table: + +[source,sql] +---- +ALTER TABLE my_catalog=>sensor_readings +WITH (error_handling_policy = 'FILL_NULL'); +---- diff --git a/modules/reference/pages/sql/sql-statements/copy-to.adoc b/modules/reference/pages/sql/sql-statements/copy-to.adoc new file mode 100644 index 000000000..36ba8c99e --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/copy-to.adoc @@ -0,0 +1,202 @@ += COPY TO +:description: The COPY TO statement exports tables, specific columns, or query results to CSV files or to a client. +:page-topic-type: reference + +The `COPY TO` statement exports tables, specific columns, or query results to a CSV file or to the standard output. + +[WARNING] +==== +By default, the `COPY TO` command overwrites the destination file if it already exists. Ensure that the directory where you save the file has the necessary write permissions. +==== + +== Syntax + +[source,sql] +---- +COPY { table_name [ ( column_name [, ...] ) ] | ( query ) } +TO { 'file_path' | STDOUT } +[ WITH ( option [, ...] ) ]; +---- + +* `table_name`: Table containing the data to export. +* `column_name`: Optional. Specific columns to export. If omitted, all columns are exported. +* `query`: A `SELECT` statement to export specific results. +* `file_path`: Path to the output file. Use `STDOUT` to send the data to the standard output stream. +* `option`: One or more options. See <>. + +[#options] +== Options + +* `FORMAT`: Output format. Only `CSV` is supported. +* `DELIMITER`: Single-character string used to separate fields. Default is `,`. +* `HEADER`: Whether to include a header row with column names. Accepts `ON`, `TRUE`, `1`, `OFF`, `FALSE`, or `0`. Default is `OFF`. +* `NULL`: String used to represent `NULL` values. Default is the empty string. +* Cloud storage credentials: Use `AWS_CRED`, `AZURE_CRED`, or `GCS_CRED` to authenticate when exporting to cloud storage. See <>. + +== Examples + +The following examples use a table mapped to a Redpanda topic through a catalog. For information on setting up catalogs and tables, see xref:reference:sql/sql-statements/create-table.adoc[CREATE TABLE]. + +=== Export all columns from a table + +Copy all columns in a table to a CSV file: + +[source,sql] +---- +COPY my_catalog=>employee_salary TO '/path/to/exportsalary.csv'; +---- + +=== Export specific columns from a table + +Specify the column names to export only those columns: + +[source,sql] +---- +COPY my_catalog=>employee_salary (empid, empname, empsalary) TO '/path/to/exportsalary.csv'; +---- + +=== Export results of a SELECT statement + +Export only the rows that match a `WHERE` condition: + +[source,sql] +---- +COPY (SELECT * FROM my_catalog=>employee_salary WHERE empdept = 'Marketing') TO '/path/to/exportsalary.csv'; +---- + +=== Export with a custom delimiter + +Specify the delimiter using the `DELIMITER` option. Common delimiters include comma (`,`), semicolon (`;`), pipe (`|`), and dash (`-`). + +[source,sql] +---- +COPY my_catalog=>customer TO '/path/to/customerexport.csv' WITH (DELIMITER ';'); +---- + +The exported CSV uses the specified delimiter: + +[source,text] +---- +cust_id;cust_name +11001;Maya +11003;Ricky +11009;Sean +---- + +=== Export with column headers + +Include column names as a header row using `HEADER ON` (or `HEADER TRUE`, or `HEADER 1`): + +[source,sql] +---- +COPY my_catalog=>personal_details TO '/path/to/personalinfo.csv' WITH (HEADER ON); +---- + +The exported file includes a header row: + +[source,text] +---- +id,first_name,last_name,gender +1,'Mark','Wheeler','M' +2,'Tom','Hanks','M' +3,'Jane','Hopper','F' +---- + +To omit the header (the default), use `HEADER OFF` (or `HEADER FALSE`, or `HEADER 0`). + +=== Export with a NULL replacement string + +Specify a string to replace `NULL` values in the exported file: + +[source,sql] +---- +COPY my_catalog=>example_table TO '/path/to/exampleexport.csv' WITH (NULL 'unknown'); +---- + +In the exported file, `NULL` values are represented as `unknown`. + +=== Export to standard output + +Send the data directly to the client instead of writing to a server-side file: + +[source,sql] +---- +COPY my_catalog=>book_inventory TO STDOUT; +---- + +The query returns: + +[source,sql] +---- +"To Kill a Mockingbird",5 +1984,8 +"The Great Gatsby",3 +"Moby Dick",2 +"War and Peace",4 +---- + +[NOTE] +==== +* Only CSV format is supported with `STDOUT`. +* The default delimiter for CSV format is a comma (`,`). +==== + +[#cloud-storage] +=== Export to cloud storage + +To export data to cloud storage, use the `COPY TO` command with the appropriate credentials option for your provider. + +==== AWS S3 + +* `AWS_REGION`: AWS region associated with the storage service. +* `AWS_KEY_ID`: Key identifier for authentication. +* `AWS_PRIVATE_KEY`: Access key for authentication. +* `ENDPOINT`: URL endpoint for the storage service. + +[source,sql] +---- +COPY my_catalog=>film TO 's3://your-bucket/file_name' +WITH (AWS_CRED(AWS_REGION 'us-west-1', AWS_KEY_ID 'key_id', AWS_PRIVATE_KEY 'access_key', ENDPOINT 's3.us-west-1.amazonaws.com'), + FORMAT CSV, HEADER ON, NULL 'unknown'); +---- + +==== Google Cloud Storage + +Pass the path to your credentials JSON file: + +[source,sql] +---- +COPY my_catalog=>film TO 'gs://your-bucket/file_name' WITH (GCS_CRED('/path/to/credentials.json')); +---- + +If you cannot use a path to the credentials file, pass its contents as a string: + +[source,sql] +---- +COPY my_catalog=>project TO 'gs://your-bucket/project_file' WITH (GCS_CRED('')); +---- + +You can also use `AWS_CRED` with GCS by setting the endpoint: + +[source,sql] +---- +COPY my_catalog=>project TO 'gs://your-bucket/project_file' +WITH (AWS_CRED(AWS_REGION 'region1', AWS_KEY_ID 'key_id', AWS_PRIVATE_KEY 'access_key', ENDPOINT 'https://storage.googleapis.com')); +---- + +[TIP] +==== +For Google Cloud Storage, use HMAC keys for authentication. See the link:https://cloud.google.com/storage/docs/authentication/hmackeys[HMAC keys - Cloud Storage^] page for details. +==== + +==== Azure Blob Storage + +* `TENANT_ID`: Tenant identifier representing your organization's identity in Azure. +* `CLIENT_ID`: Client identifier used for authentication. +* `CLIENT_SECRET`: Secret identifier used as a password for authentication. + +[source,sql] +---- +COPY my_catalog=>taxi_data TO 'wasbs://container-name/your_blob' +WITH (AZURE_CRED(TENANT_ID 'your_tenant_id' CLIENT_ID 'your_client_id', CLIENT_SECRET 'your_client_secret')); +---- diff --git a/modules/reference/pages/sql/sql-statements/create-redpanda-catalog.adoc b/modules/reference/pages/sql/sql-statements/create-redpanda-catalog.adoc new file mode 100644 index 000000000..605e16821 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/create-redpanda-catalog.adoc @@ -0,0 +1,93 @@ += CREATE REDPANDA CATALOG +:description: The CREATE REDPANDA CATALOG statement creates a named connection to a Redpanda cluster, enabling you to map topics as SQL tables. +:page-topic-type: reference + +The `CREATE REDPANDA CATALOG` statement creates a named connection to a Redpanda cluster. After creating a catalog, you can map Redpanda topics as queryable SQL tables using xref:reference:sql/sql-statements/create-table.adoc[CREATE TABLE]. + +== Syntax + +[source,sql] +---- +CREATE REDPANDA CATALOG [IF NOT EXISTS] catalog_name +WITH (option = 'value' [, ...]); +---- + +* `catalog_name`: Name for the new catalog connection. +* `IF NOT EXISTS`: Optional. Prevents an error if a catalog with the same name already exists. + +== Options + +[cols="<30%,<15%,<10%,<45%",options="header"] +|=== +|Option |Type |Required |Description + +|`initial_brokers` +|STRING +|Yes +|Bootstrap broker address(es) for the Redpanda cluster. + +|`schema_registry_url` +|STRING +|Yes +|URL of the Schema Registry endpoint. + +|`sasl_mechanism` +|STRING +|No +|SASL authentication mechanism (for example, `SCRAM-SHA-256`). + +|`sasl_username` +|STRING +|No +|SASL username. + +|`sasl_password` +|STRING +|No +|SASL password. + +|`tls_enabled` +|STRING +|No +|Enable TLS for broker connections (`true` or `false`). + +|`schema_registry_username` +|STRING +|No +|Schema Registry authentication username. + +|`schema_registry_password` +|STRING +|No +|Schema Registry authentication password. +|=== + +== Examples + +=== Create a basic catalog + +[source,sql] +---- +CREATE REDPANDA CATALOG my_catalog +WITH ( + initial_brokers = 'broker1:9092', + schema_registry_url = 'http://schema-registry:8081' +); +---- + +=== Create a catalog with authentication + +[source,sql] +---- +CREATE REDPANDA CATALOG my_secure_catalog +WITH ( + initial_brokers = 'broker1:9092', + schema_registry_url = 'https://schema-registry:8081', + sasl_mechanism = 'SCRAM-SHA-256', + sasl_username = 'admin', + sasl_password = 'secret', + tls_enabled = 'true', + schema_registry_username = 'sr_user', + schema_registry_password = 'sr_pass' +); +---- diff --git a/modules/reference/pages/sql/sql-statements/create-storage.adoc b/modules/reference/pages/sql/sql-statements/create-storage.adoc new file mode 100644 index 000000000..b5660288a --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/create-storage.adoc @@ -0,0 +1,53 @@ += CREATE STORAGE +:description: The CREATE STORAGE statement creates a connection to external object storage for use with Redpanda catalogs. +:page-topic-type: reference + +The `CREATE STORAGE` statement creates a named connection to external object storage such as Amazon S3. + +== Syntax + +[source,sql] +---- +CREATE STORAGE [IF NOT EXISTS] storage_name +TYPE = S3 +WITH (option = 'value' [, ...]); +---- + +* `storage_name`: Name for the new storage connection. +* `TYPE`: Storage type. Redpanda SQL currently supports only `S3`. +* `IF NOT EXISTS`: Optional. Prevents an error if a storage connection with the same name already exists. + +== Options + +[cols="<30%,<15%,<10%,<45%",options="header"] +|=== +|Option |Type |Required |Description + +|`region` +|STRING +|Yes +|Cloud region for the storage bucket (for example, `us-west-2`). + +|`access_key_id` +|STRING +|Yes +|AWS access key ID. + +|`secret_access_key` +|STRING +|Yes +|AWS secret access key. +|=== + +== Examples + +[source,sql] +---- +CREATE STORAGE my_s3_storage +TYPE = S3 +WITH ( + region = 'us-west-2', + access_key_id = 'AKIAIOSFODNN7EXAMPLE', + secret_access_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' +); +---- diff --git a/modules/reference/pages/sql/sql-statements/create-table.adoc b/modules/reference/pages/sql/sql-statements/create-table.adoc new file mode 100644 index 000000000..bcd8bdf6a --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/create-table.adoc @@ -0,0 +1,105 @@ += CREATE TABLE +:description: The CREATE TABLE statement maps a Redpanda topic to a SQL table through a catalog, making topic data queryable with SQL. +:page-topic-type: reference + +The `CREATE TABLE` statement maps a Redpanda topic to a SQL table through a catalog. After creating the table, you can query topic data using standard SQL. + +NOTE: You must first xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[create a Redpanda catalog connection] before creating tables. `CREATE TABLE` in Redpanda SQL maps Redpanda topics to SQL tables — it does not create standalone tables with user-defined schemas. + +== Syntax + +[source,sql] +---- +CREATE TABLE [IF NOT EXISTS] catalog_name=>table_name +WITH (option = 'value' [, ...]); +---- + +* `catalog_name`: Name of an existing Redpanda catalog. +* `table_name`: Name for the new table. +* `IF NOT EXISTS`: Optional. Prevents an error if a table with the same name already exists in the catalog. + +== Options + +[cols="<30%,<15%,<10%,<45%",options="header"] +|=== +|Option |Type |Required |Description + +|`topic` +|STRING +|Yes +|Name of the Redpanda topic to map to this table. + +|`schema_subject` +|STRING +|No +|Schema Registry subject name to use for deserializing topic data. + +|`schema_lookup_policy` +|STRING +|No +|How to resolve the schema version. Only `LATEST` is supported. + +|`error_handling_policy` +|STRING +|No +a|How to handle records that fail deserialization. + +* `FAIL` (default): Raises an error. +* `FILL_NULL`: Replaces failed fields with NULL. +* `DROP_RECORD`: Skips the record. + +|`struct_mapping_policy` +|STRING +|No +a|How to map nested structures to SQL columns. + +* `JSON` (default): Stores nested data as JSON. +* `FLATTEN`: Expands nested fields into top-level columns. +* `COMPOUND`: Maps to ROW types. +* `VARIANT`: Stores as a variant type. + +|`output_schema_message_full_name` +|STRING +|No +|Full Protobuf message name. Required when the schema contains multiple message definitions. +|=== + +== Examples + +=== Create a basic table + +Map the `transactions` topic to a table through `my_catalog`: + +[source,sql] +---- +CREATE TABLE my_catalog=>transactions +WITH (topic = 'transactions'); +---- + +=== Specify a Schema Registry subject + +Map a topic and specify the Schema Registry subject: + +[source,sql] +---- +CREATE TABLE my_catalog=>user_events +WITH ( + topic = 'user-events', + schema_subject = 'user-events-value', + schema_lookup_policy = 'LATEST' +); +---- + +=== Create a table with error handling + +Map a topic and skip records that fail deserialization: + +[source,sql] +---- +CREATE TABLE IF NOT EXISTS my_catalog=>sensor_readings +WITH ( + topic = 'sensor-data', + schema_subject = 'sensor-data-value', + error_handling_policy = 'DROP_RECORD' +); +---- diff --git a/modules/reference/pages/sql/sql-statements/describe.adoc b/modules/reference/pages/sql/sql-statements/describe.adoc new file mode 100644 index 000000000..3b36ddbf3 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/describe.adoc @@ -0,0 +1,128 @@ += DESCRIBE +:description: The DESCRIBE statement displays columns of a table or lists tables in a database. +:page-topic-type: reference + +The `DESCRIBE` statement displays columns of a table or lists tables in a database. + +== Syntax + +[source,sql] +---- +DESCRIBE DATABASE; +DESCRIBE TABLE table_name; +DESCRIBE TABLE catalog_name=>table_name; +DESCRIBE REDPANDA CATALOG catalog_name; +---- + +* `table_name`: Name of the table to describe. +* `catalog_name=>table_name`: Describes a table that is mapped to a Redpanda topic through a catalog. +* `catalog_name`: Name of a Redpanda catalog. Lists the tables and topic mappings in that catalog. + +[NOTE] +==== +This statement is available to all users with the `USAGE` privilege on the schema where the table is located. +==== + +== Examples + +=== Describe a table + +To show the columns of the `part` table, run the query: + +[source,sql] +---- +DESCRIBE TABLE part; +---- + +The query returns: + +[source,sql] +---- ++----------------+------------+-------------+-------+----------+ +| database_name | table_name | name | type | nullable | ++----------------+------------+-------------+-------+----------+ +| public | part | p_partkey | INT | f | +| public | part | p_name | TEXT | f | +| public | part | p_mfgr | TEXT | f | +| public | part | p_category | TEXT | f | +| public | part | p_brand | TEXT | f | +| public | part | p_color | TEXT | f | +| public | part | p_type | TEXT | f | +| public | part | p_size | INT | f | +| public | part | p_container | TEXT | f | ++----------------+------------+-------------+-------+----------+ +---- + +[TIP] +==== +Tables in this example use the `public` schema, the default in Redpanda SQL. For information on displaying tables from other schemas, see xref:reference:sql/schema.adoc[Schema]. +==== + +=== Describe a database + +To list all tables in the database, run: + +[source,sql] +---- +DESCRIBE DATABASE; +---- + +The query returns: + +[source,sql] +---- ++-----------------------------+ +| name | ++-----------------------------+ +| supplier_scale_1_no_index | +| features | +| orders | +| features2 | +| featurestable | +| featurestable1 | +| featurestable10 | ++-----------------------------+ +---- + +=== Describe a catalog table + +To view column details for a table mapped through a Redpanda catalog, run: + +[source,sql] +---- +DESCRIBE TABLE my_catalog=>transactions; +---- + +The query returns: + +[source,sql] +---- ++----------------+--------------+-------------+-------+----------+ +| database_name | table_name | name | type | nullable | ++----------------+--------------+-------------+-------+----------+ +| my_catalog | transactions | tx_id | INT | f | +| my_catalog | transactions | amount | REAL | f | +| my_catalog | transactions | created_at | TEXT | f | ++----------------+--------------+-------------+-------+----------+ +---- + +=== Describe a Redpanda catalog + +To list the tables and topic mappings in a Redpanda catalog, run: + +[source,sql] +---- +DESCRIBE REDPANDA CATALOG my_catalog; +---- + +The query returns: + +[source,sql] +---- ++----------------+--------------+ +| table_name | topic_name | ++----------------+--------------+ +| transactions | transactions | +| user_events | user-events | ++----------------+--------------+ +---- diff --git a/modules/reference/pages/sql/sql-statements/drop-redpanda-catalog.adoc b/modules/reference/pages/sql/sql-statements/drop-redpanda-catalog.adoc new file mode 100644 index 000000000..93bf456b9 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/drop-redpanda-catalog.adoc @@ -0,0 +1,32 @@ += DROP REDPANDA CATALOG +:description: The DROP REDPANDA CATALOG statement removes a Redpanda catalog connection. +:page-topic-type: reference + +The `DROP REDPANDA CATALOG` statement removes a Redpanda catalog connection. You must drop all tables within a catalog before dropping the catalog itself. + +== Syntax + +[source,sql] +---- +DROP REDPANDA CATALOG [IF EXISTS] catalog_name; +---- + +* `catalog_name`: Name of the catalog to remove. +* `IF EXISTS`: Optional. Prevents an error if the catalog does not exist. + +== Examples + +First, drop all tables in the catalog: + +[source,sql] +---- +DROP TABLE my_catalog=>user_events; +DROP TABLE my_catalog=>transactions; +---- + +Then drop the catalog: + +[source,sql] +---- +DROP REDPANDA CATALOG my_catalog; +---- diff --git a/modules/reference/pages/sql/sql-statements/drop-storage.adoc b/modules/reference/pages/sql/sql-statements/drop-storage.adoc new file mode 100644 index 000000000..6ba8eb644 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/drop-storage.adoc @@ -0,0 +1,22 @@ += DROP STORAGE +:description: The DROP STORAGE statement removes a named storage definition. +:page-topic-type: reference + +The `DROP STORAGE` statement removes a named storage definition. + +== Syntax + +[source,sql] +---- +DROP STORAGE [IF EXISTS] storage_name; +---- + +* `storage_name`: Name of the storage definition to remove. +* `IF EXISTS`: Optional. Prevents an error if the storage definition does not exist. + +== Examples + +[source,sql] +---- +DROP STORAGE my_s3_storage; +---- diff --git a/modules/reference/pages/sql/sql-statements/drop-table.adoc b/modules/reference/pages/sql/sql-statements/drop-table.adoc new file mode 100644 index 000000000..16477679d --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/drop-table.adoc @@ -0,0 +1,28 @@ += DROP TABLE +:description: The DROP TABLE statement removes a table that was mapped to a Redpanda topic through a catalog. +:page-topic-type: reference + +The `DROP TABLE` statement removes a table that was mapped to a Redpanda topic through a catalog. This does not delete the underlying Redpanda topic. + +== Syntax + +[source,sql] +---- +DROP TABLE [IF EXISTS] catalog_name=>table_name; +---- + +* `catalog_name`: Name of the Redpanda catalog containing the table. +* `table_name`: Name of the table to remove. +* `IF EXISTS`: Optional. Prevents an error if the table does not exist. + +== Examples + +[source,sql] +---- +DROP TABLE my_catalog=>transactions; +---- + +[source,sql] +---- +DROP TABLE IF EXISTS my_catalog=>old_events; +---- diff --git a/modules/reference/pages/sql/sql-statements/index.adoc b/modules/reference/pages/sql/sql-statements/index.adoc new file mode 100644 index 000000000..e7dd6068d --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/index.adoc @@ -0,0 +1,30 @@ += SQL statements +:description: SQL statements are the commands used to interact with Redpanda SQL. +:page-topic-type: reference + +SQL statements are commands used to interact with Redpanda SQL. These statements let you query data, manage catalog connections, and export results. + +NOTE: Redpanda SQL operates in read-only mode. Data mutation operations (`INSERT`, `UPDATE`, `DELETE`) and bulk import (`COPY FROM`) are not available. Data is ingested into Redpanda topics and made queryable through catalog mappings. + +The following table summarizes the statements supported by Redpanda SQL: + +[cols="<48%,<52%",options="header"] +|=== +|Statement |Description +|xref:reference:sql/sql-statements/alter-redpanda-catalog.adoc[ALTER REDPANDA CATALOG] |Modifies connection properties of an existing Redpanda catalog. +|xref:reference:sql/sql-statements/alter-storage.adoc[ALTER STORAGE] |Modifies credentials or configuration of an existing storage connection. +|xref:reference:sql/sql-statements/alter-table.adoc[ALTER TABLE] |Modifies options of a catalog table mapped to a Redpanda topic. +|xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[CREATE REDPANDA CATALOG] |Creates a named connection to a Redpanda cluster. +|xref:reference:sql/sql-statements/create-storage.adoc[CREATE STORAGE] |Creates a connection to external object storage. +|xref:reference:sql/sql-statements/create-table.adoc[CREATE TABLE] |Maps a Redpanda topic to a SQL table through a catalog. +|xref:reference:sql/sql-statements/drop-table.adoc[DROP TABLE] |Removes a catalog table mapping. Does not delete the underlying Redpanda topic. +|xref:reference:sql/sql-statements/drop-redpanda-catalog.adoc[DROP REDPANDA CATALOG] |Removes a Redpanda catalog connection. +|xref:reference:sql/sql-statements/drop-storage.adoc[DROP STORAGE] |Removes a named storage definition. +|xref:reference:sql/sql-statements/select.adoc[SELECT] |Retrieves data from a table. +|xref:reference:sql/sql-statements/copy-to.adoc[COPY TO] |Exports query results or table data to CSV files. +|xref:reference:sql/sql-statements/set-show.adoc[SET/SHOW] |Configures or displays session-level settings such as time zone and search path. +|xref:reference:sql/sql-statements/show-execs.adoc[SHOW EXECS] |Displays currently running execution tasks across the cluster. +|xref:reference:sql/sql-statements/show-tables.adoc[SHOW TABLES] |Lists all tables within the current schema, database, or catalog. +|xref:reference:sql/sql-statements/show-nodes.adoc[SHOW NODES] |Displays the current state of nodes in a distributed cluster. +|xref:reference:sql/sql-statements/describe.adoc[DESCRIBE] |Shows detailed information about columns in a table, tables within a database, or catalog contents. +|=== diff --git a/modules/reference/pages/sql/sql-statements/keywords.adoc b/modules/reference/pages/sql/sql-statements/keywords.adoc new file mode 100644 index 000000000..688398b3d --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/keywords.adoc @@ -0,0 +1,472 @@ += Keywords +:description: In Redpanda SQL, reserved and non-reserved keywords play an important role in SQL syntax and usage. +:page-topic-type: reference + +Redpanda SQL has reserved and non-reserved keywords. Reserved keywords cannot be used as identifiers (such as table or column names) unless explicitly quoted. Examples include `SELECT`, `INSERT`, and `UPDATE`. + +Non-reserved keywords have special meanings only in specific contexts and can be used as identifiers in others. For example, `DB` is non-reserved, so you can use it as a database name. + +The following table lists all available keywords: + +[cols="2,^1,^1,2",options="header"] +|=== +|Keyword |Reserved |Non-reserved |Notes + +|ABSOLUTE | |✓ | +|ACTION | |✓ | +|ADD | |✓ | +|AFTER | |✓ | +|AGGREGATE | |✓ | +|ALL |✓ | | +|ALLOCATE |✓ | | +|ALTER | |✓ | +|ANALYSE |✓ | | +|ANALYZE |✓ | | +|AND |✓ | | +|ANY |✓ | | +|ANY_VALUE | |✓ | +|ARE |✓ | | +|ARRAY |✓ | |Requires AS +|ARRAY_MAX_CARDINALITY | |✓ | +|AS |✓ | |Requires AS +|ASC |✓ | | +|ASENSITIVE | |✓ | +|ASSERTION | |✓ | +|ASSIGNMENT | |✓ | +|ASYMMETRIC |✓ | | +|AT | |✓ | +|ATOMIC | |✓ | +|AUTHORIZATION |✓ | |Can be function or type +|AVG | |✓ | +|BEFORE | |✓ | +|BEGIN | |✓ | +|BEGIN_FRAME | |✓ | +|BEGIN_PARTITION | |✓ | +|BETWEEN | |✓ |Cannot be function or type +|BIGINT | |✓ |Cannot be function or type +|BIT | |✓ |Cannot be function or type +|BIT_LENGTH |✓ | | +|BLOB | |✓ | +|BOOL | |✓ | +|BOOLEAN | |✓ |Cannot be function or type +|BOTH |✓ | | +|BY | |✓ | +|CACHE | |✓ | +|CALL |✓ | | +|CALLED |✓ | | +|CARDINALITY | |✓ | +|CASCADE |✓ | | +|CASCADED |✓ | | +|CASE |✓ | | +|CAST |✓ | | +|CATALOG | |✓ | +|CEILING | |✓ | +|CHAR | |✓ | +|CHAR_LENGTH | |✓ | +|CHARACTER | |✓ | +|CHARACTER_LENGTH | |✓ | +|CHECK |✓ | | +|CLASSIFIER | |✓ | +|CLOB | |✓ | +|CLOSE |✓ | | +|COALESCE |✓ | | +|COLLATE |✓ | | +|COLLATION |✓ | | +|COLLECT | |✓ | +|COLUMN |✓ | | +|COLUMNS | |✓ | +|COMMIT |✓ | | +|CONDITION |✓ | | +|CONNECT |✓ | | +|CONNECTION |✓ | | +|CONSTRAINT |✓ | | +|CONSTRAINTS | |✓ | +|CONTAINS | |✓ | +|CONTINUE |✓ | | +|CONTROL | |✓ | +|CONVERT | |✓ | +|COPY | |✓ | +|CORR | |✓ | +|CORRESPONDING |✓ | | +|COVAR_POP | |✓ | +|COVAR_SAMP | |✓ | +|CREATE |✓ | | +|CROSS |✓ | | +|CUBE |✓ | | +|CUME_DIST | |✓ | +|CURRENT |✓ | | +|CURRENT_USER |✓ | | +|CURRENT_ROLE |✓ | | +|CURSOR |✓ | | +|CYCLE |✓ | | +|DATABASE | |✓ | +|DATABASES | |✓ | +|DATALINK | |✓ | +|DATE | |✓ | +|DATETIME | |✓ | +|DAY | |✓ | +|DEALLOCATE |✓ | | +|DEC | |✓ | +|DECFLOAT | |✓ | +|DECIMAL | |✓ | +|DECLARE |✓ | | +|DEFAULT |✓ | | +|DEFERRABLE |✓ | | +|DEFERRED |✓ | | +|DEFINE | |✓ | +|DELETE |✓ | | +|DELTA | |✓ | +|DENSE_RANK | |✓ | +|DEREF | |✓ | +|DESC |✓ | | +|DESCRIBE |✓ | | +|DESCRIPTOR |✓ | | +|DETERMINISTIC |✓ | | +|DIAGNOSTICS |✓ | | +|DIRECT | |✓ | +|DISCONNECT |✓ | | +|DISTINCT |✓ | | +|DLNEWCOPY | |✓ | +|DLPREVIOUSCOPY | |✓ | +|DLURLCOMPLETE | |✓ | +|DLURLCOMPLETEONLY | |✓ | +|DLURLCOMPLETEWRITE | |✓ | +|DLURLPATH | |✓ | +|DLURLPATHONLY | |✓ | +|DLURLPATHWRITE | |✓ | +|DLURLSCHEME | |✓ | +|DLURLSERVER | |✓ | +|DLVALUE | |✓ | +|DO |✓ | | +|DOMAIN | |✓ | +|DOUBLE | |✓ | +|DROP |✓ | | +|DYNAMIC | |✓ | +|EACH |✓ | | +|ELEMENT | |✓ | +|ELSE |✓ | | +|EMPTY | |✓ | +|END |✓ | | +|END_FRAME | |✓ | +|END_PARTITION | |✓ | +|EQUALS | |✓ | +|ESCAPE |✓ | | +|EVERY |✓ | | +|EXCEPT |✓ | | +|EXCEPTION |✓ | | +|EXEC |✓ | | +|EXECUTE |✓ | | +|EXISTS |✓ | | +|EXP | |✓ | +|EXPLAIN |✓ | | +|EXTERNAL |✓ | | +|EXTRACT |✓ | | +|FALSE |✓ | | +|FETCH |✓ | | +|FILE | |✓ | +|FILTER |✓ | | +|FIRST |✓ | | +|FIRST_VALUE | |✓ | +|FLOAT | |✓ | +|FLOOR | |✓ | +|FOR |✓ | | +|FOREIGN |✓ | | +|FORMAT | |✓ | +|FOUND | |✓ | +|FRAME_ROW | |✓ | +|FREE | |✓ | +|FROM |✓ | | +|FULL |✓ | | +|FUNCTION |✓ | | +|FUSION | |✓ | +|GET | |✓ | +|GLOBAL |✓ | | +|GO | |✓ | +|GOTO | |✓ | +|GRANT |✓ | | +|GROUP |✓ | | +|GROUPING |✓ | | +|GROUPS | |✓ | +|HASH | |✓ | +|HAVING |✓ | | +|HINT | |✓ | +|HOLD | |✓ | +|HOUR | |✓ | +|IDENTITY |✓ | | +|IF |✓ | | +|ILIKE | |✓ | +|IMMEDIATE |✓ | | +|IMPORT | |✓ | +|IN |✓ | | +|INDEX |✓ | | +|INDICATOR |✓ | | +|INITIAL |✓ | | +|INITIALLY |✓ | | +|INNER |✓ | | +|INOUT |✓ | | +|INPUT |✓ | | +|INSENSITIVE |✓ | | +|INSERT |✓ | | +|INT | |✓ | +|INTEGER | |✓ | +|INTERSECT |✓ | | +|INTERSECTION | |✓ | +|INTERVAL |✓ | | +|INTO |✓ | | +|IS |✓ | | +|ISNULL | |✓ | +|ISOLATION |✓ | | +|JOIN |✓ | | +|JSON | |✓ | +|JSON_ARRAY | |✓ | +|JSON_ARRAYAGG | |✓ | +|JSON_EXISTS | |✓ | +|JSON_OBJECT | |✓ | +|JSON_OBJECTAGG | |✓ | +|JSON_QUERY | |✓ | +|JSON_TABLE | |✓ | +|JSON_TABLE_PRIMITIVE | |✓ | +|JSON_VALUE | |✓ | +|JSONB | |✓ | +|KEY | |✓ | +|LAG | |✓ | +|LANGUAGE |✓ | | +|LARGE | |✓ | +|LAST |✓ | | +|LAST_VALUE | |✓ | +|LATERAL |✓ | | +|LEAD | |✓ | +|LEADING |✓ | | +|LEFT |✓ | | +|LEVEL | |✓ | +|LIKE |✓ | | +|LIKE_REGEX | |✓ | +|LIMIT |✓ | | +|LISTAGG | |✓ | +|LN | |✓ | +|LOAD | |✓ | +|LOCAL |✓ | | +|LOCALTIME |✓ | | +|LOCALTIMESTAMP |✓ | | +|LONG | |✓ | +|MEASURES | |✓ | +|MEMBER | |✓ | +|MERGE |✓ | | +|METHOD | |✓ | +|MINUS |✓ | | +|MINUTE | |✓ | +|MODIFIES |✓ | | +|MODULE | |✓ | +|MONTH | |✓ | +|MULTISET | |✓ | +|NAMES | |✓ | +|NATIONAL | |✓ | +|NATURAL |✓ | | +|NCHAR | |✓ | +|NCLOB | |✓ | +|NEW |✓ | | +|NEXT | |✓ | +|NO |✓ | | +|NONE | |✓ | +|NOT |✓ | | +|NTILE | |✓ | +|NULL |✓ | | +|NULLIF |✓ | | +|NULLS |✓ | | +|NVARCHAR | |✓ | +|OCCURRENCES_REGEX | |✓ | +|OCTET_LENGTH | |✓ | +|OF |✓ | | +|OFF | |✓ | +|OFFSET |✓ | | +|OLD |✓ | | +|OMIT | |✓ | +|ON |✓ | | +|ONE | |✓ | +|ONLY |✓ | | +|OPEN |✓ | | +|OPTION |✓ | | +|OR |✓ | | +|ORDER |✓ | | +|OUT |✓ | | +|OUTER |✓ | | +|OUTPUT |✓ | | +|OVER |✓ | | +|OVERLAPS |✓ | | +|OVERLAY | |✓ | +|PAD | |✓ | +|PARAMETER |✓ | | +|PARAMETERS | |✓ | +|PARTIAL |✓ | | +|PARTITION |✓ | | +|PATTERN | |✓ | +|PER | |✓ | +|PERCENT | |✓ | +|PERCENT_RANK | |✓ | +|PERCENTILE_CONT | |✓ | +|PERCENTILE_DISC | |✓ | +|PERIOD |✓ | | +|PERMUTE | |✓ | +|PLACING | |✓ | +|PLAN | |✓ | +|PORTION | |✓ | +|PRECEDES | |✓ | +|PRECISION |✓ | | +|PREPARE |✓ | | +|PRESERVE |✓ | | +|PRIMARY |✓ | | +|PRIOR |✓ | | +|PRIVILEGES | |✓ | +|PROCEDURE |✓ | | +|PTF | |✓ | +|PUBLIC |✓ | | +|RANGE |✓ | | +|READ |✓ | | +|READS |✓ | | +|REAL | |✓ | +|RECURSIVE |✓ | | +|REF |✓ | | +|REFERENCES |✓ | | +|REFERENCING |✓ | | +|REGR_AVGX | |✓ | +|REGR_AVGY | |✓ | +|REGR_COUNT | |✓ | +|REGR_INTERCEPT | |✓ | +|REGR_R2 | |✓ | +|REGR_SLOPE | |✓ | +|REGR_SXX | |✓ | +|REGR_SXY | |✓ | +|REGR_SYY | |✓ | +|RELATIVE | |✓ | +|RELEASE |✓ | | +|RENAME |✓ | | +|RESTRICT |✓ | | +|RESULT |✓ | | +|RETURN |✓ | | +|RETURNS |✓ | | +|REVOKE |✓ | | +|RIGHT |✓ | | +|ROLLBACK |✓ | | +|ROLLUP |✓ | | +|ROW |✓ | | +|ROW_NUMBER | |✓ | +|ROWS |✓ | | +|RUNNING | |✓ | +|SAVEPOINT |✓ | | +|SCHEMA |✓ | | +|SCHEMAS | |✓ | +|SCOPE |✓ | | +|SCROLL |✓ | | +|SEARCH | |✓ | +|SECOND | |✓ | +|SECTION | |✓ | +|SEEK | |✓ | +|SELECT |✓ | | +|SENSITIVE |✓ | | +|SESSION |✓ | | +|SESSION_USER |✓ | | +|SET |✓ | | +|SHOW | |✓ | +|SIMILAR | |✓ | +|SIZE | |✓ | +|SKIP | |✓ | +|SMALLINT | |✓ | +|SOME |✓ | | +|SORTED | |✓ | +|SPACE | |✓ | +|SPATIAL | |✓ | +|SPECIFIC |✓ | | +|SPECIFICTYPE | |✓ | +|SQL |✓ | | +|SQLCODE | |✓ | +|SQLERROR | |✓ | +|SQLEXCEPTION | |✓ | +|SQLSTATE | |✓ | +|SQLWARNING | |✓ | +|START |✓ | | +|STATIC |✓ | | +|STDDEV_POP | |✓ | +|STDDEV_SAMP | |✓ | +|STRING | |✓ | +|SUBMULTISET | |✓ | +|SUBSET | |✓ | +|SUCCEEDS | |✓ | +|SYMMETRIC |✓ | | +|SYSTEM |✓ | | +|SYSTEM_TIME | |✓ | +|SYSTEM_USER |✓ | | +|TABLE |✓ | | +|TABLES | |✓ | +|TABLESAMPLE |✓ | | +|TEMPORARY |✓ | | +|TEXT | |✓ | +|THEN |✓ | | +|TIME | |✓ | +|TIMESTAMP | |✓ | +|TIMESTAMP_TRUNC | |✓ | +|TO |✓ | | +|TOP | |✓ | +|TRAILING |✓ | | +|TRANSACTION |✓ | | +|TRANSLATE |✓ | | +|TRANSLATE_REGEX | |✓ | +|TRANSLATION | |✓ | +|TREAT |✓ | | +|TRIGGER |✓ | | +|TRUE |✓ | | +|TRUNCATE |✓ | | +|UESCAPE |✓ | | +|UNION |✓ | | +|UNIQUE |✓ | | +|UNKNOWN |✓ | | +|UNLOAD | |✓ | +|UNMATCHED | |✓ | +|UNNEST | |✓ | +|UPDATE |✓ | | +|UPPER | |✓ | +|USAGE |✓ | | +|USER | |✓ | +|USING |✓ | | +|VALUES |✓ | | +|VAR_POP | |✓ | +|VAR_SAMP | |✓ | +|VARBINARY | |✓ | +|VARCHAR | |✓ | +|VARIADIC |✓ | | +|VARYING |✓ | | +|VERSIONING | |✓ | +|VIEW |✓ | | +|VIRTUAL | |✓ | +|WHEN |✓ | | +|WHENEVER |✓ | | +|WHERE |✓ | | +|WIDTH_BUCKET | |✓ | +|WINDOW |✓ | | +|WITH |✓ | | +|WITHIN |✓ | | +|WITHOUT |✓ | | +|WORK |✓ | | +|WRITE | |✓ | +|XML | |✓ | +|XMLAGG | |✓ | +|XMLATTRIBUTES | |✓ | +|XMLBINARY | |✓ | +|XMLCAST | |✓ | +|XMLCOMMENT | |✓ | +|XMLCONCAT | |✓ | +|XMLDOCUMENT | |✓ | +|XMLELEMENT | |✓ | +|XMLEXISTS | |✓ | +|XMLFOREST | |✓ | +|XMLITERATE | |✓ | +|XMLNAMESPACES | |✓ | +|XMLPARSE | |✓ | +|XMLPI | |✓ | +|XMLQUERY | |✓ | +|XMLSERIALIZE | |✓ | +|XMLTABLE | |✓ | +|XMLTEXT | |✓ | +|XMLVALIDATE | |✓ | +|YEAR | |✓ | +|ZONE | |✓ | +|=== diff --git a/modules/reference/pages/sql/sql-statements/select.adoc b/modules/reference/pages/sql/sql-statements/select.adoc new file mode 100644 index 000000000..f0e6183fd --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/select.adoc @@ -0,0 +1,128 @@ += SELECT +:description: The SELECT statement retrieves data from one or more tables. +:page-topic-type: reference + +The `SELECT` statement retrieves data from one or more tables. Use `SELECT` to: + +* Retrieve specific columns from a table. +* Query data across multiple tables. +* Filter results based on specific criteria. + +== Syntax + +To retrieve data from a table, use this syntax: + +[source,sql] +---- +SELECT * FROM table_name; +---- + +To filter by specific columns, use: + +[source,sql] +---- +SELECT column1, column2, ... +FROM table_name; +---- + +Where: + +* `SELECT`: Specifies the data to retrieve. +* `*`: Returns all columns. +* `FROM`: Specifies the table to query. +* `table_name`: The name of the table. +* `column1, column2, ...`: The columns to retrieve. + +[NOTE] +==== +The `SELECT` statement is case-insensitive. `select` and `SELECT` produce the same result. +==== + +== Examples + +The following examples query a table named `student_data` that contains student records with `id`, `name`, and `domicile` columns. + +=== Query data from all columns + +. To display all the data from the `student_data` table, use this syntax: ++ +[source,sql] +---- +SELECT * FROM table_name; +---- + +. Run the following query: ++ +[source,sql] +---- +SELECT * FROM student_data; +---- + +. The query returns: ++ +[source,sql] +---- ++--------+----------+----------------+ +| id | name | domicile | ++--------+----------+----------------+ +| 119291 | Jordan | Los Angeles | +| 119292 | Mike | Melbourne | +| 119293 | Will | Sydney | ++--------+----------+----------------+ +---- + +=== Query data from specific columns + +. To get the list of students' names with their IDs, use this syntax: ++ +[source,sql] +---- +SELECT column_1, column_2 FROM table_name; +---- + +. Run: ++ +[source,sql] +---- +SELECT id, name FROM student_data; +---- + +. The query returns: ++ +[source,sql] +---- ++--------+----------+ +| id | name | ++--------+----------+ +| 119291 | Jordan | +| 119292 | Mike | +| 119293 | Will | ++--------+----------+ +---- + +=== Query data from a specific column with the condition + +. With a large amount of data, skimming for the desired data can take a long time. Apply conditions to the `SELECT` statement to narrow the results: ++ +[source,sql] +---- +SELECT column_1 FROM table_name WHERE condition; +---- + +. To find the student who lives in Sydney, run: ++ +[source,sql] +---- +SELECT name FROM student_data WHERE domicile='Sydney'; +---- + +. The query returns: ++ +[source,sql] +---- ++----------+ +| name | ++----------+ +| Will | ++----------+ +---- diff --git a/modules/reference/pages/sql/sql-statements/set-show.adoc b/modules/reference/pages/sql/sql-statements/set-show.adoc new file mode 100644 index 000000000..0dac3f447 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/set-show.adoc @@ -0,0 +1,156 @@ += SET and SHOW +:description: The SET statement configures session options. The SHOW statement displays their current values. +:page-topic-type: reference + +The `SET` statement configures session options. The `SHOW` statement displays their current values. + +== Syntax + +`SET` statement: + +[source,sql] +---- +SET