How I learned to stop worrying
and love Postgres-isms
PostgresConf US 2018
April 19, 2018
Jersey City, NJ
(jackdb.com)
pgjdbc
(Java driver)node-postgres
(Node.js driver)"PostgreSQL development aims for conformance with the latest official version of the standard where such conformance does not contradict traditional features or common sense".
https://www.postgresql.org/docs/current/static/features.html
No need to specify a relation for SELECT queries
-- Look ma, no FROM! SELECT 'test' AS x;
x ------ test (1 row)
Less typing and easy on the eyes
-- Using CAST(...) syntax:
SELECT CAST('2018-04-19' AS date)
-- Using :: syntax
SELECT '2018-04-19'::date
https://www.postgresql.org/docs/current/static/sql-expressions.html#SQL-SYNTAX-TYPE-CASTS
SELECT *
FROM conference c
WHERE c.year = 2018
AND c.name ~* '(postgresql)|(pgsql)'
Simplifies quoting literals that include quotes
-- Regular single quotes:
SELECT 'This is a literal with a '' single quote';
-- Dollar quotes:
SELECT $$This is a literal with a ' single quote$$;
-- Tagged dollar quotes:
SELECT $tag$I can even include $$ in $$ quotes$tag$;
Great hack for importing a block of text as data[1]
SELECT
t.fields[1] AS title,
t.fields[2]::date AS start_date,
t.fields[3]::date AS end_date,
t.fields[4] AS city
FROM (
SELECT regexp_split_to_array(line, ',') AS fields
FROM regexp_split_to_table(
$$PGConf US,2016-04-18,2016-04-20,New York City
PostgresConf US 2017,2017-03-28,2017-03-31,Jersey City
PostgresConf US 2018,2018-04-16,2018-04-20,Jersey City$$
, '\n') AS line) t
[1]: For a one-off data munge... Don't do this in an actual app.
https://www.postgresql.org/docs/current/static/transaction-iso.html
generate_series(...)
The best way to bootstrap sample data
-- Without a built-in generator:
SELECT x
FROM (SELECT ROWNUM AS x
FROM some_big_table_i_hope_is_big_enough) t;
WHERE x < 1000;
-- With built-in generate_series(...)
SELECT x
FROM generate_series(1, 100) x;
x
-----
1
2
3
...
(100 rows)
Combine with built-in operators for other types
SELECT now()::date - x AS some_date
FROM generate_series(1, 100) x;
some_date
------------
2018-04-18
2018-04-17
2018-04-15
2018-04-16
...
(100 rows)
Like scalar subqueries but with more than one column
-- Most recent purchase per customer
SELECT
c.id,
c.email,
p.transaction_date,
p.total_price
FROM customer c
LEFT JOIN LATERAL (
SELECT
FROM purchase p
-- Filter on this specific customer's purchases:
WHERE p.customer_id = c.id
-- Get only the most recent transaction
ORDER BY p.transaction_date DESC
LIMIT 1) p ON true
-- Workaround with CASE ... WHEN ... END
SELECT
SUM(CASE
WHEN c.type = 'postgres' THEN 1 ELSE 0
END) AS num_postgres,
SUM(CASE
WHEN c.type != 'postgres' THEN 1 ELSE 0
END) AS num_non_postgres
FROM conference c
-- With filtered aggregates
SELECT
COUNT(*) FILTER (WHERE c.type = 'postgres')
AS num_postgres,
COUNT(*) FILTER (WHERE c.type != 'postgres')
AS num_non_postgres
FROM conference c
Part of SQL:2003 standard yet only supported by PostgreSQL
PostgreSQL includes many data types
boolean
text
uuid
date
timestamp / timestamptz
json / jsonb
inet / cidr
https://www.postgresql.org/docs/current/static/datatype.html
Simplify filtering in WHERE clauses
-- Using ints 1/0 for true/false
SELECT *
FROM foo f
WHERE f.is_active = 1
vs.
-- Using an actual boolean type
SELECT *
FROM foo f
WHERE f.is_active
Better signal-to-noise ratio for UPDATEs
-- Using ints 1/0 for true/false
UPDATE foo
SET
is_active =
CASE WHEN bar = 'xyz'
THEN 1
ELSE 0
END
vs.
-- Using an actual boolean type
UPDATE foo
SET is_active = (bar = 'xyz')
=> SELECT '' IS NULL;
?column?
----------
f
[1]: Limited to 1 GB. If you get close to that size you've got other problems...
[2]: Though you probably should just use UTF-8
=> SELECT pgcrypto.gen_random_uuid();
gen_random_uuid
--------------------------------------
380a168a-7309-4f43-a3de-d89a86fd2284
[1]: That's a >50% storage savings which translates to more data fitting in the cache.
=> SELECT t.data->>'foo'
FROM (SELECT '{"foo": 123}'::json AS data) t;
?column?
----------
123
(1 row)
row_to_json(...)
and json_agg(...)
SELECT
p.id,
p.name,
p.type,
(SELECT
-- Convert NULL to empty list:
COALESCE(json_agg(t.*), '[]'::json)
FROM (
-- Sub-SELECT to get subset of keys we want:
SELECT
c.id,
c.name
FROM child c
WHERE c.parent_id = p.id) t) AS children
FROM parent p
row_to_json(...)
and json_agg(...)
id | name | type | children
----+------+--------+------------------------
1 | foo | wizzle | [{"id":1,"name":"a"}, +
| | | {"id":2,"name":"b"}, +
| | | {"id":3,"name":"c"}]
2 | bar | wizzle | [{"id":4,"name":"d"}]
3 | baz | wozzle | []
(3 rows)
Only one round-trip from app to database.
-- Account ids start with an "A" followed by 8 digits
CREATE DOMAIN account_id_type AS text
CONSTRAINT account_id_type_ck_format
CHECK (VALUE ~ '^A[0-9]{8}$');
-- Usernames must start with a letter and be alphanumeric
CREATE DOMAIN app_username AS text
CONSTRAINT app_username_ck_valid
CHECK (VALUE ~ '^[a-z][a-z0-9]{2,254}$');
-- Person ids start with an "P" followed by 15 digits
CREATE DOMAIN person_id_type AS text
CONSTRAINT person_id_type_ck_format
CHECK (VALUE ~ '^P[0-9]{15}$');
Define once then reuse in multiple places
CREATE TABLE person (
id person_id_type PRIMARY KEY,
username app_username,
name text NOT NULL
);
CREATE TABLE account (
id account_id_type PRIMARY KEY,
owner_id person_id_type NOT NULL REFERENCES person (id),
balance numeric NOT NULL
);
Index and constrain only a subset of a table
CREATE TABLE menu_item (
id uuid PRIMARY KEY,
menu_date date NOT NULL,
name text NOT NULL,
price numeric NOT NULL,
is_daily_special boolean NOT NULL DEFAULT false
);
CREATE INDEX menu_item_ix_menu_date_name
ON menu_item (menu_date, name);
-- Only allow one item to be the daily special per day
CREATE UNIQUE INDEX menu_item_uk_menu_date_is_daily_special
ON menu_item (menu_date)
WHERE is_daily_special;
Allows for indexing an expression
CREATE UNIQUE INDEX foo_ix_lower_name
ON foo (LOWER(name));
=> EXPLAIN SELECT * FROM foo WHERE LOWER(name) = 'abc'; QUERY PLAN ------------------------------------------------------- Index Scan using foo_ix_lower_name on foo (cost=0.43..8.45 rows=1 width=18) Index Cond: (lower(name) = 'abc'::text)
BEGIN;
CREATE TABLE foo (/* ... */);
CREATE INDEX foo_ix_id ON foo (/* ... */);
INSERT INTO foo (/* ... */);
CREATE TABLE bar (/* ... */);
CREATE INDEX bar_ix_id ON bar (/* ... */);
CREATE INDEX bar_ix_foo_id ON bar (/* ... */);
INSERT INTO bar (/* ... */);
COMMIT; -- Or ROLLBACK!
[1]: Some limitations such as altering enums but most DDL is transactional.
The fastest way to export data
COPY (
SELECT s.*
FROM some_huge_table s
WHERE s.foo = 'bar'
AND s.baz = 123
) TO STDOUT
WITH (FORMAT CSV)
"template1"
First create a backup:
CREATE DATABASE app_backup
TEMPLATE app
OWNER app_owner;
-- Drop the current app database:
DROP DATABASE app;
-- Create it again from our backup:
CREATE DATABASE app
TEMPLATE app_backup
OWNER app_owner;
How do we prevent two customers from booking the same room?
CHECK: (start < end)
UNIQUE: (room_id, start)
SELECT FOR UPDATE
CREATE TABLE reservation (
id bigserial PRIMARY KEY,
room_id bigint NOT NULL REFERENCES room (id),
booking_dates daterange NOT NULL,
-- Prohibit overlapping bookings for the same room:
CONSTRAINT reservation_uk_room_id_booking_dates
EXCLUDE USING gist
(room_id WITH =, booking_dates WITH &&),
-- Ensure that the daterange are half open
CONSTRAINT reservation_ck_booking_dates_valid
CHECK ( lower_inc(booking_dates)
AND NOT upper_inc(booking_dates))
);
Available since v9.2 (released in 2012!)
-- [123456789 123456789 123456789 ]
-- [*********- ]
-- [ ********- ]
-- [ **********- ]
INSERT INTO reservation (room_id, booking_dates)
VALUES (1, '[2018-01-01,2018-01-10)');
INSERT INTO reservation (room_id, booking_dates)
VALUES (1, '[2018-01-10,2018-01-18)');
INSERT INTO reservation (room_id, booking_dates)
VALUES (1, '[2018-01-05,2018-01-15)');
ERROR: 23P01: conflicting key value violates exclusion
constraint "reservation_uk_room_id_booking_dates"
DETAIL: Key (room_id, booking_dates)=
(1, [2018-01-05,2018-01-15)) conflicts with existing
key (room_id, booking_dates)=(1, [2018-01-01,2018-01-10)).
SCHEMA NAME: public
TABLE NAME: reservation
CONSTRAINT NAME: reservation_uk_room_id_booking_dates
LOCATION: check_exclusion_or_unique_constraint, execIndexing.c:836
How to get a live view of all of this together?
CREATE EXTENSION postgres_fdw;
CREATE SERVER billing_db
FOREIGN DATA WRAPPER postgres_fdw
OPTIONS ( /* Server info for billing database */ );
CREATE USER MAPPING FOR public
SERVER billing_db
OPTIONS ( /* Credentails for billing database */ );
CREATE FOREIGN TABLE billing_transaction (
id uuid NOT NULL,
customer_id uuid NOT NULL,
created_at timestamptz NOT NULL,
amount numeric NOT NULL
) SERVER billing_db;
CREATE EXTENSION redis_fdw;
CREATE SERVER redis_server
FOREIGN DATA WRAPPER redis_fdw
OPTIONS ( /* Server details for Redis */ );
CREATE USER MAPPING FOR PUBLIC
SERVER redis_server
OPTIONS ( /* Credentials for Redis */ );
CREATE FOREIGN TABLE redis_session (
key text,
value text)
SERVER redis_server;
SELECT
/* ... Customer details ... */
bt.total_amount,
(s.detail->>'created_at')::timestamptz AS session_created_at,
(s.detail->>'page_views')::int AS page_views
FROM (SELECT
bt.customer_id,
SUM(amount) AS total_amount
FROM billing_transaction bt
WHERE br.created_at > (now() - '30 days'::interval) bt
JOIN customer c ON c.id = bt.customer_id,
LATERAL (SELECT ((s.value)::json) AS detail
FROM redis_session s
WHERE s.key = ('customer:' || c.id)) s
WHERE (s.detail->>'page_views')::int > 10
Don't crawl on N databases...
when you can run on one!
Sehrope Sarkuni
sehrope@jackdb.com