This document summarizes how to set up and use Citus, an open-source PostgreSQL-based distributed database. It explains how to install Citus, add worker nodes, create distributed tables, and use features like reference tables to perform distributed queries across the cluster.
22. select * from sales where deptno=1;
deptno | deptname | total_amount
--------+-------------+--------------
1 | french_dept | 10000
SELECT shardid, shardstate, shardlength, nodename, nodeport, placementid
FROM pg_dist_placement AS placement,
pg_dist_node AS node
WHERE placement.groupid = node.groupid
AND node.noderole = 'primary'
AND shardid = (
SELECT get_shard_id_for_distribution_column('sales', 1)
);
shardid | shardstate | shardlength | nodename | nodeport | placementid
---------+------------+-------------+----------------------+----------+-------------
102009 | 1 | 0 | dev-hanccitus003-ncl | 6432 | 2
23. -- create example table
CREATE TABLE products (
store_id bigint,
product_id bigint,
name text,
price money,
CONSTRAINT products_pkey PRIMARY KEY (store_id, product_id)
);
-- pick store_id as distribution column
SELECT create_distributed_table('products', 'store_id');
-- get distribution column name for products table
SELECT column_to_column_name(logicalrelid, partkey) AS dist_col_name
FROM pg_dist_partition
WHERE logicalrelid='products'::regclass;
dist_col_name
---------------
store_id
24.
25. installation
git clone -b v${CITUS_VER} https://github.com/citusdata/citus.git citus-v${CITUS_VER}
./configure
make
sudo make install
# config 수정
vi $PGDATA/postgresql.conf
shared_preload_libraries = 'citus'
vi $PGDATA/pg_hba.conf
sudo vi /etc/hosts
10.113.252.215 dev-hanccitus001-ncl.nfra.io dev-hanccitus001-ncl
10.113.252.111 dev-hanccitus002-ncl.nfra.io dev-hanccitus002-ncl
10.113.254.10 dev-hanccitus003-ncl.nfra.io dev-hanccitus003-ncl
10.113.255.8 dev-hanccitus004-ncl.nfra.io dev-hanccitus004-ncl
# restart
pg_ctl stop
pg_ctl start
SELECT * FROM pg_available_extensions WHERE name='citus';
name | default_version | installed_version | comment
-------+-----------------+-------------------+----------------------------
citus | 8.3-1 | 8.3-1 | Citus distributed database
SHOW shared_preload_libraries ;
shared_preload_libraries
--------------------------
citus
CREATE EXTENSION citus ;
postgres=# dx
List of installed extensions
Name | Version | Schema | Description
---------+---------+------------+------------------------------
citus | 8.3-1 | pg_catalog | Citus distributed database
26. add worker nodes
SELECT * from master_add_node('dev-hanccitus002-ncl', 6432);
SELECT * from master_add_node('dev-hanccitus003-ncl', 6432);
SELECT * from master_add_node('dev-hanccitus004-ncl', 6432);
SELECT * FROM master_get_active_worker_nodes();
node_name | node_port
----------------------+-----------
dev-hanccitus004-ncl | 6432
dev-hanccitus002-ncl | 6432
dev-hanccitus003-ncl | 6432
CREATE TABLE sales
(deptno int not null,
deptname varchar(20),
total_amount int,
CONSTRAINT pk_sales PRIMARY KEY (deptno)) ;
SELECT create_distributed_table('sales', 'deptno');
insert into sales (deptno,deptname,total_amount) values (1,'french_dept',10000);
insert into sales (deptno,deptname,total_amount) values (2,'german_dept',15000);
insert into sales (deptno,deptname,total_amount) values (3,'china_dept',21000);
insert into sales (deptno,deptname,total_amount) values (4,'gambia_dept',8750);
insert into sales (deptno,deptname,total_amount) values (5,'japan_dept',12010);
insert into sales (deptno,deptname,total_amount) values (6,'china_dept',35000);
insert into sales (deptno,deptname,total_amount) values (7,'nigeria_dept',10000);
insert into sales (deptno,deptname,total_amount) values (8,'senegal_dept',33000);
insert into sales (deptno,deptname,total_amount) values (9,'korea_dept',43000);
insert into sales (deptno,deptname,total_amount) values (10,'usa_dept',5000);
create_distributed_table
27. explain
naverdb=> explain verbose select * from sales where deptno=2;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
Output: remote_scan.deptno, remote_scan.deptname, remote_scan.total_amount
Task Count: 1
Tasks Shown: All
-> Task
Node: host=dev-hanccitus002-ncl port=6432 dbname=naverdb
-> Index Scan using pk_sales_102032 on appo.sales_102032 sales (cost=0.15..8.17 rows=1 width=66)
Output: deptno, deptname, total_amount
Index Cond: (sales.deptno = 2)
(9 rows)
Time: 3.367 ms
28. create_reference_table
CREATE TABLE geo_ips (
addrs cidr NOT NULL PRIMARY KEY,
latlon point NOT NULL
CHECK (-90 <= latlon[0] AND latlon[0] <= 90 AND
-180 <= latlon[1] AND latlon[1] <= 180)
);
CREATE INDEX ON geo_ips USING gist (addrs inet_ops);
SELECT create_reference_table('geo_ips');
copy geo_ips from 'geo_ips.csv' with csv
SELECT c.id, clicked_at, latlon
FROM geo_ips, clicks c
WHERE addrs >> c.user_ip
AND c.company_id = 5
AND c.ad_id = 290;
id | clicked_at | latlon
------+---------------------+---------------------
3155 | 2017-03-16 03:56:00 | (42.3763,-85.4597)
3156 | 2017-06-10 09:44:11 | (34.0067,-118.3455)
3158 | 2017-02-11 18:40:11 | (4.5981,-74.0758)
3159 | 2017-05-27 22:38:18 | (42.2399,-83.1508)
3160 | 2017-02-27 07:48:24 | (30.0355,31.223)
3162 | 2017-05-30 14:01:24 | (46.0511,14.5051)
3163 | 2017-02-02 11:20:42 | (46.0511,14.5051)
3164 | 2017-01-22 08:51:16 | (30.0355,31.223)
3168 | 2017-01-12 05:40:53 | (46.0511,14.5051)
3169 | 2017-04-20 21:06:53 | (44.8784,-93.2793)
3171 | 2017-06-12 10:37:48 | (42.2399,-83.1508)
(11 rows)