PGConf NYC 2021
December 3, 2021(jackdb.com)
pgjdbc
(Java driver)node-postgres
(Node.js driver)Show me your flowchart and conceal your tables, and I shall continue to be mystified. Show me your tables, and I won't usually need your flowchart; it'll be obvious.
— Fred Brooks, The Mythical Man Month (1975)
Bad programmers worry about the code. Good programmers worry about data structures and their relationships.
— Linus Torvalds, LKML (2006)
Dumb schemas require smart code.
Smart schemas stop dumb code.
— Sehrope Sarkuni, PGConf NYC (2021)
Too slow ≈ Impossible
Smaller ≈ Faster
/* UUID stored as text,
e.g. "7c9a0778-566b-49a7-8743-205009b836aa" */
CREATE TABLE bad_idea (
id_as_text text
);
/* UUID stored as native type */
CREATE TABLE good_idea (
id_as_uuid uuid
);
INSERT INTO bad_idea
SELECT md5('' || x)::uuid::text
FROM generate_series(1, 1000000) x;
INSERT INTO good_idea
SELECT md5('' || x)::uuid
FROM generate_series(1, 1000000) x;
table_name | table_size
-----------+-----------
bad_idea | 65 MB
good_idea | 42 MB
Smaller and faster
INSERT INTO good_idea (id_as_uuid)
VALUES ('Not_A_UUID');
-- ERROR: invalid input syntax for type uuid: "Not_A_UUID"
Smaller, faster, and correct
CREATE TABLE bad_alignment (
a boolean,
b bigint,
c boolean,
d bigint,
e boolean,
f bigint,
g boolean,
h bigint
);
CREATE TABLE good_alignment (
b bigint,
d bigint,
f bigint,
h bigint,
a boolean,
c boolean,
e boolean,
g boolean
);
Column definition order matters
INSERT INTO bad_alignment
SELECT true, 1, true, 2, true, 3, true, 4
FROM generate_series(1, 1000000) x;
INSERT INTO good_alignment
SELECT 1, 2, 3, 4, true, true, true, true
FROM generate_series(1, 1000000) x;
table_name | table_size
----------------+------------
bad_alignment | 89 MB
good_alignment | 65 MB
Smaller rows ≈ More rows per page ≈ Faster
[12345678 12345678 12345678 12345678 12345678 12345678]
[B DDDDDDDD B DDDDDDDD B DDDDDDDD]
[DDDDDDDD DDDDDDDD DDDDDDDD BBB-----]
"B" = Boolean "DDDDDDDD" = Double "-" = Unused
(Go biggest to smallest)
And it's great at it
text
fields:uuid
int / bigint / numeric
date
timestamptz
json / jsonb
bytea
CREATE TABLE person (
id uuid PRIMARY KEY,
username text NOT NULL,
name text NOT NULL,
CONSTRAINT person_ck_username_pattern
CHECK (username ~* '^[a-z][a-z0-9_]+[a-z0-9]$'),
CONSTRAINT person_ck_name_length
CHECK (LENGTH(name) <= 255)
);
Check yourself before you wreck yourself
— Ice Cube, Check Yo' Self (1992)
INSERT INTO person (id, username, name) VALUES (
'b4e4b397-5104-f16c-0d4a-b95bab4a8eae'
'#bad#',
'Alice');
-- ERROR: new row for relation "person" violates
-- check constraint "person_ck_username_pattern"
INSERT INTO person (id, username, name)
SELECT 'b4e4b397-5104-f16c-0d4a-b95bab4a8eae' AS id,
'good' AS username,
repeat('too_long_', 100) AS name;
-- ERROR: new row for relation "person" violates
-- check constraint "person_ck_name_length"
Explicit naming of constraints helps too!
CREATE TABLE widget (
id uuid PRIMARY KEY,
manager_person_id bigint REFERENCES person(id),
manager_group_id bigint REFERENCES person(id)
CONSTRAINT widget_ck_manager
CHECK ( (manager_person_id IS NULL)
!= (manager_group_id IS NULL))
);
Enforce that exactly one type of manager field is populated.
SELECT '{foo,bar,baz,bam}'::text[]
SELECT '{{foo,bar},{baz,bam}}'::text[]
CREATE OR REPLACE FUNCTION array_sort(anyarray)
RETURNS anyarray AS
$BODY$
SELECT array_agg(elem ORDER BY elem)
FROM UNNEST($1) AS elem;
$BODY$
LANGUAGE SQL IMMUTABLE;
CREATE TABLE foo (
some_array text[] NOT NULL,
CONSTRAINT foo_ck_some_array_dim
CHECK (array_ndims(some_array) = 1),
CONSTRAINT foo_ck_some_array_order
CHECK (some_array = array_sort(some_array))
);
Enforce dimensions and sort order to guarantee it for all reads.
/* Create a backup */
CREATE DATABASE app_bk
TEMPLATE app
OWNER app_owner;
/* Run your DB migrations here ... */
/* No good? */
DROP DATABASE app;
CREATE DATABASE app
TEMPLATE app_bk
OWNER app_owner;
PostgreSQL guarantees consistency
(And consistency is always priority #1)
CREATE UNIQUE INDEX foo_uk_bar_id
ON foo (bar_id);
/* This succeeds */
INSERT INTO foo (bar_id) VALUES (1);
/* This fails */
INSERT INTO foo (bar_id) VALUES (1);
-- error: duplicate key value violates
-- unique constraint "foo_uk_bar_id"
PostgreSQL guarantees we cannot insert duplicates
(And consistency is always priority #1)
CREATE INDEX foo_ix_active_bar_id
ON foo (bar_id)
WHERE active;
SELECT t.*
FROM foo t
WHERE t.active
AND t.bar_id = 123
Only data matching the WHERE clause is indexed
(Smaller ≈ Faster)
CREATE INDEX foo_ix_a_including_b_c
ON foo (a)
INCLUDE (b, c);
/* Index-only scan is possible */
SELECT
t.a,
t.b,
t.c
FROM foo t
WHERE t.a = 123
Read non-key fields without going back to table
(Efficiency > Simplicity)
CREATE INDEX foo_ix_lower_name
ON foo (lower(name));
/* This will use our index */
SELECT t.*
FROM foo t
WHERE lower(name) = 'alice'
/* UNIQUE ensures we cannot insert duplicates */
CREATE UNIQUE INDEX foo_uk_lower_name_active
/* Indexed value is the lower case transformation */
ON foo (lower(name))
/* Only include rows which are active */
WHERE active;
PostgreSQL guarantees we cannot insert active duplicates with the same lower case name
(And consistency is always priority #1)
CREATE VIEW active_account_detail AS
SELECT
a.id,
a.manager_id,
p.name AS manager_name,
a.department_id,
d.name AS department_name,
a.created_at
FROM account a
JOIN person p ON p.id = a.manager_id
JOIN department d ON d.id = a.department_id
WHERE a.is_active
CREATE VIEW recent_sales AS
SELECT
s.id,
s.sale_date,
s.sale_price,
s.quantity,
s.product_id,
p.name AS product_name,
p.unit_price,
s.customer_id,
c.name AS customer_name
FROM generate_series(0, 30) x
JOIN sale s ON s.sale_date = (now()::date - x)
JOIN product p ON p.id = s.product_id
JOIN customer c ON c.id = s.customer_id
CREATE FUNCTION accessible_product (IN p_person_id int)
RETURNS SETOF product
LANGUAGE SQL
AS
$BODY$
SELECT p.*
FROM product p
WHERE p.owner_id = p_person_id
OR EXISTS(
SELECT 1
FROM product_group pg
JOIN person_group x ON x.group_id = pg.group_id
WHERE pg.product_id = p.id
AND x.person_id = p_person_id)
$BODY$
Embed parameters and hide details
How to prevent overlapping bookings of the same room?
start
and end
CHECK
: (start < end)
UNIQUE
: (room_id, start)
SELECT FOR UPDATE
CREATE TABLE reservation (
id bigserial PRIMARY KEY,
room_id bigint NOT NULL REFERENCES room (id),
booking_dates daterange NOT NULL,
-- Prohibit overlapping bookings for the same room:
CONSTRAINT reservation_uk_room_id_booking_dates
EXCLUDE USING gist
(room_id WITH =, booking_dates WITH &&),
-- Ensure that the daterange are half open
CONSTRAINT reservation_ck_booking_dates_valid
CHECK ( lower_inc(booking_dates)
AND NOT upper_inc(booking_dates))
);
PostgreSQL will ensure this constraint is enforced
(And consistency is always priority #1)Sehrope Sarkuni
sehrope@jackdb.com