2. What is CQL?
* Cassandra Query Language (CQL)
* SQL-like language for communicating with Cassandra
* Simpler than the Thrift API
* An abstraction layer that hides implementation details
This is what we want to understand
5. TimeUUID
* Also known as a Version 1 UUID
* Sortable
Timestamp to Microsecond + UUID = TimeUUID
04d580b0-9412-11e3-baa8-0800200c9a66 12 February 2014 13:18:06 GMT
http://www.famkruithof.net/uuid/uuidgen"
=
8. Clustering Column
Merged, Sorted and Stored Sequentially
04d580b0-9412-…9a66
2013-04-03 07:01:00
content: Hi!
sender: ali@example.tr
2013-04-03 07:03:20
content: Hello!
Sender: tom@example…
2013-04-03 07:04:52
content: Where are you?
sender: ali@example.tr
2013-04-03 07:05:01
content: in Istanbul
sender: tom@example…
2013-04-03 07:06:32
content: wow! how come
sender: ali@example.tr
* Data on disk is ordered based on Clustering Column
* Efficient retrieval with range queries (slice)
SELECT * FROM messages
WHERE conversation_id = '04d580b0-9412-…9a66'
AND message_id > minTimeuuid('2013-04-03 07:04:00')
AND message_id < maxTimeuuid('2013-04-03 07:10:00');
9. Data on Disk
Partition Key (Row Key)
Column Name 1 Column Value 1
Column Name 2 Column Value 2
Column Name 3 Column Value 3
...
Column Name N Column Value N
10. Data on Disk
04d580b0-9412-3a00-93d1-46196ee79a66
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:content Hi!
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:sender ali@example.tr
2f3feb0f-9c24-11e2-7f7f-7f7f7f7f7f7f:
2f3feb0f-9c24-11e2-7f7f-7f7f7f7f7f7f:content Hello!
2f3feb0f-9c24-11e2-7f7f-7f7f7f7f7f7f:sender tom@example.com
...
Clustering Column (message_id) Column Name Column Value
Partition Key (conversation_id)
INSERT
INTO
messages
(conversation_id,
message_id,
content,
sender)
VALUES
(04d580b0-‐9412-‐3a00-‐93d1-‐46196ee79a66,
2f3feb0f-‐9c24-‐11e2-‐7f7f-‐7f7f7f7f7f7f,
'Hello!',
'tom@example.com');
11. Order of Clustering Keys
CREATE TABLE messages (
conversation_id uuid,
message_id timeuuid,
content text,
sender text,
PRIMARY KEY (conversation_id, message_id)
)
WITH CLUSTERING ORDER BY (message_id DESC);
* We need only most recent N messages
* Storing messages in reverse TimeUUID order will speedup queries
12. Static Columns
CREATE TABLE messages (
conversation_id uuid,
conversation_owner text STATIC,
message_id timeuuid,
content text,
sender text,
PRIMARY KEY (conversation_id, message_id)
);
* Let’s add conversation owner (admin)
* Owner is related to conversation (Partition Key) not message (Clustering Key)
13. Static Columns
UPDATE messages SET
conversation_owner = 'ali@example.tr'
WHERE
conversation_id = 04d580b0-9412-3a00-93d1-46196ee79a66;
* Same UPDATE with non-static field will fail
15. Collections: Set
CREATE TABLE messages (
conversation_id uuid,
conversation_owner text STATIC,
message_id timeuuid,
content text,
sender text,
recipients set<text>,
PRIMARY KEY (conversation_id, message_id)
);
* We want to keep message recipients
* List of recipients may vary as people join and leave conversation
16. Collections: Set
UPDATE messages SET
recipients = {'ali@example.tr', 'tom@example.com'}
WHERE
conversation_id = 04d580b0-9412-3a00-93d1-46196ee79a66 AND
message_id = dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f;
17. Set on Disk
04d580b0-9412-3a00-93d1-46196ee79a66
:null:conversation_owner ali@example.tr
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:content Hi!
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:sender ali@example.tr
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:recipient:ali@example.tr
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:recipient:tom@example.com
2f3feb0f-9c24-11e2-7f7f-7f7f7f7f7f7f:
...
Set
18. Collections: Map
CREATE TABLE messages (
conversation_id uuid,
conversation_owner text STATIC,
message_id timeuuid,
content text,
sender text,
recipients set<text>,
attachments map<text,text>,
PRIMARY KEY (conversation_id, message_id)
);
* Let’s add attachments to message
* Each attachment would have name and location (URI)
19. Collections: Map
UPDATE messages SET
attachments = {'picture.png':'http://cdn.exmpl.com/1234.png',
'audio.wav':'http://cdn.exmpl.com/5678.wav'}
WHERE
conversation_id = 04d580b0-9412-3a00-93d1-46196ee79a66 AND
message_id = dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f;
20. Map on Disk
04d580b0-9412-3a00-93d1-46196ee79a66
:null:conversation_owner ali@example.tr
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:content Hi!
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:sender ali@example.tr
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:recipient:ali@example.tr
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:recipient:tom@example.com
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:attachments:picture.png http://cdn.exmpl.com/1234.png
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:attachments:audio.wav http://cdn.exmpl.com/5678.wav
2f3feb0f-9c24-11e2-7f7f-7f7f7f7f7f7f:
...
Map Name Key Value
21. Collections: List
CREATE TABLE messages (
conversation_id uuid,
conversation_owner text STATIC,
message_id timeuuid,
content text,
sender text,
recipients set<text>,
attachments map<text,text>,
seen_by list<text>,
PRIMARY KEY (conversation_id, message_id)
);
* We want to know which participants have seen message and preserve order
22. Collections: List
UPDATE messages SET
seen_by = ['adam@example.tr', 'tom@example.com']
WHERE
conversation_id = 04d580b0-9412-3a00-93d1-46196ee79a66 AND
message_id = dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f;
23. List on Disk
04d580b0-9412-3a00-93d1-46196ee79a66
:null:conversation_owner ali@example.tr
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:content Hi!
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:sender ali@example.tr
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:recipient:ali@example.tr
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:recipient:tom@example.com
dbcd9d0f-...-7f7f-7f7f7f7f7f7f:seen_by:26017c10-f487-11e2-801f-df9895e5d0f8 adam@example.tr
dbcd9d0f-...-7f7f-7f7f7f7f7f7f:seen_by:26017c11-f487-11e2-801f-df9895e5d0f8 tom@example.com
2f3feb0f-9c24-11e2-7f7f-7f7f7f7f7f7f:
...
List Name Element ID (TimeUUID) Value
24. User Defined Types (UDT)
CREATE TABLE messages (
conversation_id uuid,
conversation_owner text STATIC,
message_id timeuuid,
content text,
sender text,
recipients set<text>,
seen_by list<text>,
attachments map<text,attachment>,
PRIMARY KEY (conversation_id, message_id)
);
* New in Cassandra 2.1
* Let’s add more attributes to attachments
CREATE TYPE attachment (
size int,
mime text,
uri text
);
25. User Defined Types
UPDATE messages SET
attachments = attachments
+ { 'picture.png': { size: 10240,
mime: 'image/png',
uri: 'http://cdn.exmpl.com/1234.png' }}
WHERE
conversation_id = 04d580b0-9412-3a00-93d1-46196ee79a66 AND
message_id = dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f;
26. UDT on Disk
04d580b0-9412-3a00-93d1-46196ee79a66
:null:conversation_owner ali@example.tr
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:content Hi!
dbcd9d0f-9c23-11e2-7f7f-7f7f7f7f7f7f:sender ali@example.tr
dbcd9d0f-...-7f7f7f7f7f7f:recipient:ali@example.tr
dbcd9d0f-...-7f7f7f7f7f7f:recipient:tom@example.com
dbcd9d0f-...-7f7f7f7f7f7f:attachments:picture.png
10240:'image/png':'http://cdn.exmpl.com/
1234.png'
2f3feb0f-9c24-11e2-7f7f-7f7f7f7f7f7f:
...
Map Key UDT Value
27. Secondary Indexes
CREATE TABLE messages (
conversation_id uuid,
conversation_owner text STATIC,
message_id timeuuid,
content text,
sender text,
recipients set<text>,
seen_by list<text>,
attachments map<text,text>,
PRIMARY KEY (conversation_id, message_id)
);
* What if we want to lookup messages by sender?
CREATE INDEX sender_idx ON messages(sender); "