SlideShare a Scribd company logo
1 of 84
Download to read offline
PRACTICAL 
CELERY
CAMERON MASKE 
twitter: @cameronmaske 
email: cam@trackmaven.com 
web: http://cameronmaske.com
WHAT WE'LL 
COVER...
WHAT IS CELERY? 
HOW DOES IT WORK?
USING CELERY, BEST 
PRACTICES AND SCALING.
SURVEY
CELERY 
ASYNCHRONOUS 
DISTRIBUTED 
TASK QUEUE
OUT OF THE 
REQUEST/RESPONSE 
CYCLE. 
Example: Sending emails asynchronously.
TASKS IN THE 
BACKGROUND. 
Example: Computational heavy jobs. 
Example: Interacting with external APIs.
PERIODIC JOBS.
HISTORY 
Python. 
Released (0.1) in 2009. 
Currently on 3.1, with 3.2 in alpha. 
Developed by Ask Solem (@asksol)
ARCHITECTURE
PRODUCER 
Produces a task for the queue.
BROKER 
Stores the task backlog 
Answers, what work remains to be done? 
RabbitMQ, Redis, SQLAlchemy, Django's ORM, MongoDB...
WORKER 
Execute and consumes tasks. 
Distributed.
RESULTS BACKEND. 
Stores the results from our tasks. 
Redis, Redis, SQLAlchemy, Django's ORM, MongoDB... 
Optional!
EXAMPLE
from celery import Celery 
app = Celery('tasks', backend='amqp', broker='amqp://guest@localhost//') 
@app.task 
def add(x, y): 
return x + y
>>> result = add.delay(4, 4) 
>>> result.state 
'SUCCESS' 
>>> result.id 
'4cc7438e-afd4-4f8f-a2f3-f46567e7ca77' 
>>> result.get() 
8 
http://celery.readthedocs.org/en/latest/reference/celery.result.html
PICK YOUR FLAVOR. 
@app.task 
def add(x, y): 
return x + y 
add(2, 4) 
class AddTask(app.Task): 
def run(self, x, y): 
return x + y 
AddTask().run(2, 4)
# Async 
add.delay(2, 4) 
add.apply_aync(args=(2, 4), expires=30) 
# Eager! 
result = add.apply(args=(2, 4)) # Executes locally. 
# Or... 
add(2, 4) # Does not return a celery result!
INTERGRATING WITH 
DJANGO.
BEWARE OF DJANGO-CELERY.
http://docs.celeryproject.org/en/master/django/first-steps-with-django. 
html 
- project/ 
- config/__init__.py 
- config/settings.py 
- config/urls.py 
- manage.py
# project/config/celery.py 
from __future__ import absolute_import 
import os 
from celery import Celery 
from django.conf import settings 
# Set the default Django settings module for the 'celery' program. 
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') 
app = Celery('app') 
# Using a string here means the worker will not have to 
# pickle the object when using Windows. 
app.config_from_object('django.conf:settings') 
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) 
@app.task(bind=True) 
def debug_task(self): 
print('Request: {0!r}'.format(self.request))
# project/config/__init__.py 
from __future__ import absolute_import 
# This will make sure the app is always imported when 
# Django starts so that shared_task will use this app. 
from .celery import app as celery_app 
__all__ = ['celery_app']
celery -A project worker -l info
TESTING 
# settings.py 
import sys 
if 'test' in sys.argv: 
CELERY_EAGER_PROPAGATES_EXCEPTIONS=True, 
CELERY_ALWAYS_EAGER=True, 
BROKER_BACKEND='memory'
PATTERNS 
AND BEST 
PRACTICES.
NEVER PASS OBJECTS AS 
ARGUMENTS.
# Bad 
@app.task() 
def send_reminder(reminder): 
reminder.send_email() 
# Good 
@app.task() 
def send_reminder(pk): 
try: 
reminder = Reminder.objects.get(pk=pk) 
except Reminder.DoesNotExist: 
return 
reminder.send_email()
KEEP TASKS GRANUAL. 
CAN PROCESS MORE IN 
PARALLEL.
AVOID LAUNCHING 
SYNCHRONOUS 
SUBTASKS
# Bad 
@app.task 
def update_page_info(url): 
page = fetch_page.delay(url).get() 
info = parse_page.delay(url, page).get() 
store_page_info.delay(url, info) 
@app.task 
def fetch_page(url): 
return myhttplib.get(url) 
@app.task 
def parse_page(url, page): 
return myparser.parse_document(page) 
@app.task 
def store_page_info(url, info): 
return PageInfo.objects.create(url, info)
# Good 
def update_page_info(url): 
chain = fetch_page.s() | parse_page.s() | store_page_info.s(url) 
chain() 
@app.task() 
def fetch_page(url): 
return myhttplib.get(url) 
@app.task() 
def parse_page(page): 
return myparser.parse_document(page) 
@app.task(ignore_result=True) 
def store_page_info(info, url): 
PageInfo.objects.create(url=url, info=info) 
http://celery.readthedocs.org/en/latest/userguide/canvas.html
PERIODIC TASKS. 
http://celery.readthedocs.org/en/latest/userguide/periodic-tasks. 
html
from datetime import timedelta 
@app.periodic_task(run_every=timedelta(minutes=5)): 
def run_every_five(): 
pass
from datetime import timedelta 
class RunEveryFive(app.PeriodicTask): 
run_every = timedelta(minutes=5) 
def run(self): 
pass
from datetime import timedelta 
@app.task(): 
def run_every_five(): 
pass 
CELERYBEAT_SCHEDULE = { 
'run-every-five': { 
'task': 'tasks.run_every_five', 
'schedule': timedelta(seconds=30) 
}, 
}
CRON STYLE. 
from celery.schedules import crontab 
crontab(minute=0, hour='*/3') # Every 3 hours. 
crontab(day_of_week='sunday') # Every minute on Sundays. 
crontab(0, 0, 0, month_of_year='*/3') # First month of every quarter.
@app.periodic_task(run_every=crontab(minute=0, hour=1)) 
def schedule_emails(): 
user_ids = User.objects.values_list('id', flat=True) 
for user_id in user_ids: 
send_daily_email.delay(user_id) 
@app.task() 
def send_daily_email(user_id): 
user = User.objects.get(id=user_id) 
try: 
today = datetime.now() 
Email.objects.get( 
user=user, date__year=today.year, date__month=today.month, date__day=today.day) 
except Email.DoesNotExist: 
email = Email(user=user, body="Hey, don't forget to LOGIN PLEASE!") 
email.send() 
email.save()
CELERY BEAT A.K.A THE 
SCHEDULER. 
celery -A project beat
NEVER RUN A BEAT + 
WORKER ON A SINGLE 
CELERY PROCESS. 
# Really bad idea.... 
celery -A project worker -B
FREQUENTLY RUNNING 
PERIODIC TASKS. 
BEWARE OF "TASK STACKING"
Schedule task runs every 5 minutes. 
Tasks take 30 minutes. 
Schedule task stacks. 
Bad stuff.
EXPIRES! 
from time import sleep 
@app.periodic_task(expires=5*60, run_every=timedelta(minutes=5)) 
def schedule_task(): 
for _ in range(30): 
one_minute_task.delay() 
@app.task(expires=5*60) 
def one_minute_task(): 
sleep(60)
THINGS GO WRONG IN 
TASKS!
RETRY
from celery.exceptions import Retry 
@app.task(max_retries=10) 
def gather_data(): 
try: 
data = api.get_data() 
# etc, etc, ... 
except api.RateLimited as e: 
raise Retry(exc=e, when=e.cooldown) 
except api.IsDown: 
return
ERROR INSIGHT.
SENTRY.
STAGES
class DebugTask(app.Task): 
def after_return(self, status, retval, task_id, args, kwargs, einfo): 
print("I'm done!") 
def on_failure(self, exc, task_id, args, kwargs, einfo): 
print("I failed :(") 
def on_retry(self, exc, task_id, args, kwargs, einfo): 
print("I'll try again!") 
def on_success(self, retval, task_id, args, kwargs): 
print("I did it!")
ABSTRACT 
class AbstractTask(app.Task): 
abstract = True 
def after_return(self, *args, **kwargs): 
print("All done!") 
@app.task(base=AbstractTask) 
def add(x, y): 
return x + y
INSTANTIATION 
class DatabaseTask(app.Task): 
abstract = True 
_db = None 
@property 
def db(self): 
if self._db is None: 
self._db = Database.connect() 
return self._db
ENSURE A TASK IS 
EXECUTED ONE AT A TIME
from celery import task 
from celery.utils.log import get_task_logger 
from django.core.cache import cache 
from django.utils.hashcompat import md5_constructor as md5 
from djangofeeds.models import Feed 
logger = get_task_logger(__name__) 
LOCK_EXPIRE = 60 * 5 # Lock expires in 5 minutes 
@task 
def import_feed(feed_url): 
# The cache key consists of the task name and the MD5 digest 
# of the feed URL. 
feed_url_digest = md5(feed_url).hexdigest() 
lock_id = '{0}-lock-{1}'.format(self.name, feed_url_hexdigest) 
# cache.add fails if if the key already exists 
acquire_lock = lambda: cache.add(lock_id, 'true', LOCK_EXPIRE) 
# memcache delete is very slow, but we have to use it to take 
# advantage of using add() for atomic locking 
release_lock = lambda: cache.delete(lock_id) 
logger.debug('Importing feed: %s', feed_url) 
if acquire_lock(): 
try: 
feed = Feed.objects.import_feed(feed_url) 
finally: 
release_lock() 
return feed.url 
logger.debug( 
'Feed %s is already being imported by another worker', feed_url)
IMPORTANT SETTINGS
# settings.py 
CELERY_IGNORE_RESULT = True 
CELERYD_TASK_SOFT_TIME_LIMIT = 500 
CELERYD_TASK_TIME_LIMIT = 1000
# tasks.py 
app.task(ignore_result=True, soft_time_limit=60, time_limit=120) 
def add(x, y): 
pass
# settings.py 
CELERYD_MAX_TASKS_PER_CHILD = 500 
CELERYD_PREFETCH_MULTIPLIER = 4
BROKER
SO MANY 
CHOICES! 
RabbitMQ 
Redis 
SQLAlchemy 
Django's ORM 
MongoDB 
Amazon SQS 
CouchDB 
Beanstalk 
IronMQ
DJANGO ORM. 
# settings.py 
BROKER_URL = 'django://' 
INSTALLED_APPS = ( 
'kombu.transport.django', 
) CELERY_RESULT_BACKEND='djcelery.backends.database:DatabaseBackend' 
python manage.py syncdb
DON'T DO THIS FOR 
ANYTHING SERIOUS.
USE RABBITMQ
C OPTIMIZED LIBRARY 
$ pip install librabbitmq
WORKERS
CONCURRENCY 
celery -A project worker -C 10 
celery -A project worker --autoscale=10,1
INCREASED CONCURRENCY CAN 
QUICKLY DRAIN CONNECTIONS ON 
YOUR DATABASE 
Use a connection pooler (pgbouncer).
ROUTING
CELERY_ROUTES = { 
'email.tasks.send_mail': { 
'queue': 'priority', 
}, 
} 
# or 
send_mail.apply_async(queue="priority") 
celery -A project worker -Q email
DEDICATED WORKERS.
BOTTLENECKS
Identify 
Fix 
Repeat
Make tasks faster. 
Reduce volume of tasks.
NEWRELIC
MONITORING IS VITAL.
RABBITMQ MANGEMENT 
PLUGIN
RABBITMQ MANGEMENT PLUGIN 
HAS A GREAT HTTP API!
LIBRATO 
Gist!
CELERY FLOWER
QUESTIONS?

More Related Content

What's hot

Celery - A Distributed Task Queue
Celery - A Distributed Task QueueCelery - A Distributed Task Queue
Celery - A Distributed Task QueueDuy Do
 
Why Task Queues - ComoRichWeb
Why Task Queues - ComoRichWebWhy Task Queues - ComoRichWeb
Why Task Queues - ComoRichWebBryan Helmig
 
Europython 2011 - Playing tasks with Django & Celery
Europython 2011 - Playing tasks with Django & CeleryEuropython 2011 - Playing tasks with Django & Celery
Europython 2011 - Playing tasks with Django & CeleryMauro Rocco
 
Introduction to Python Celery
Introduction to Python CeleryIntroduction to Python Celery
Introduction to Python CeleryMahendra M
 
날로 먹는 Django admin 활용
날로 먹는 Django admin 활용날로 먹는 Django admin 활용
날로 먹는 Django admin 활용KyeongMook "Kay" Cha
 
Life in a Queue - Using Message Queue with django
Life in a Queue - Using Message Queue with djangoLife in a Queue - Using Message Queue with django
Life in a Queue - Using Message Queue with djangoTareque Hossain
 
Spring framework IOC and Dependency Injection
Spring framework  IOC and Dependency InjectionSpring framework  IOC and Dependency Injection
Spring framework IOC and Dependency InjectionAnuj Singh Rajput
 
Introduction to Redis
Introduction to RedisIntroduction to Redis
Introduction to RedisDvir Volk
 
Materialized Views and Secondary Indexes in Scylla: They Are finally here!
Materialized Views and Secondary Indexes in Scylla: They Are finally here!Materialized Views and Secondary Indexes in Scylla: They Are finally here!
Materialized Views and Secondary Indexes in Scylla: They Are finally here!ScyllaDB
 
More mastering the art of indexing
More mastering the art of indexingMore mastering the art of indexing
More mastering the art of indexingYoshinori Matsunobu
 
Let's read code: the python-requests library
Let's read code: the python-requests libraryLet's read code: the python-requests library
Let's read code: the python-requests librarySusan Tan
 
Introduction to memcached
Introduction to memcachedIntroduction to memcached
Introduction to memcachedJurriaan Persyn
 
Introduction to Apache ZooKeeper
Introduction to Apache ZooKeeperIntroduction to Apache ZooKeeper
Introduction to Apache ZooKeeperSaurav Haloi
 
Efficient Pagination Using MySQL
Efficient Pagination Using MySQLEfficient Pagination Using MySQL
Efficient Pagination Using MySQLEvan Weaver
 
Advanced Postgres Monitoring
Advanced Postgres MonitoringAdvanced Postgres Monitoring
Advanced Postgres MonitoringDenish Patel
 
Prometheus Celery Exporter
Prometheus Celery ExporterPrometheus Celery Exporter
Prometheus Celery ExporterFabio Todaro
 
Deep Dive In To Redis Replication: Vishy Kasar
Deep Dive In To Redis Replication: Vishy KasarDeep Dive In To Redis Replication: Vishy Kasar
Deep Dive In To Redis Replication: Vishy KasarRedis Labs
 

What's hot (20)

Django Celery
Django Celery Django Celery
Django Celery
 
Celery - A Distributed Task Queue
Celery - A Distributed Task QueueCelery - A Distributed Task Queue
Celery - A Distributed Task Queue
 
Why Task Queues - ComoRichWeb
Why Task Queues - ComoRichWebWhy Task Queues - ComoRichWeb
Why Task Queues - ComoRichWeb
 
Europython 2011 - Playing tasks with Django & Celery
Europython 2011 - Playing tasks with Django & CeleryEuropython 2011 - Playing tasks with Django & Celery
Europython 2011 - Playing tasks with Django & Celery
 
Introduction to Python Celery
Introduction to Python CeleryIntroduction to Python Celery
Introduction to Python Celery
 
날로 먹는 Django admin 활용
날로 먹는 Django admin 활용날로 먹는 Django admin 활용
날로 먹는 Django admin 활용
 
Life in a Queue - Using Message Queue with django
Life in a Queue - Using Message Queue with djangoLife in a Queue - Using Message Queue with django
Life in a Queue - Using Message Queue with django
 
Spring framework IOC and Dependency Injection
Spring framework  IOC and Dependency InjectionSpring framework  IOC and Dependency Injection
Spring framework IOC and Dependency Injection
 
Introduction to Redis
Introduction to RedisIntroduction to Redis
Introduction to Redis
 
Materialized Views and Secondary Indexes in Scylla: They Are finally here!
Materialized Views and Secondary Indexes in Scylla: They Are finally here!Materialized Views and Secondary Indexes in Scylla: They Are finally here!
Materialized Views and Secondary Indexes in Scylla: They Are finally here!
 
How to Design Indexes, Really
How to Design Indexes, ReallyHow to Design Indexes, Really
How to Design Indexes, Really
 
More mastering the art of indexing
More mastering the art of indexingMore mastering the art of indexing
More mastering the art of indexing
 
Let's read code: the python-requests library
Let's read code: the python-requests libraryLet's read code: the python-requests library
Let's read code: the python-requests library
 
Introduction to memcached
Introduction to memcachedIntroduction to memcached
Introduction to memcached
 
Introduction to Apache ZooKeeper
Introduction to Apache ZooKeeperIntroduction to Apache ZooKeeper
Introduction to Apache ZooKeeper
 
Flower and celery
Flower and celeryFlower and celery
Flower and celery
 
Efficient Pagination Using MySQL
Efficient Pagination Using MySQLEfficient Pagination Using MySQL
Efficient Pagination Using MySQL
 
Advanced Postgres Monitoring
Advanced Postgres MonitoringAdvanced Postgres Monitoring
Advanced Postgres Monitoring
 
Prometheus Celery Exporter
Prometheus Celery ExporterPrometheus Celery Exporter
Prometheus Celery Exporter
 
Deep Dive In To Redis Replication: Vishy Kasar
Deep Dive In To Redis Replication: Vishy KasarDeep Dive In To Redis Replication: Vishy Kasar
Deep Dive In To Redis Replication: Vishy Kasar
 

Similar to Practical Celery

Testing My Patience
Testing My PatienceTesting My Patience
Testing My PatienceAdam Lowry
 
Тестирование и Django
Тестирование и DjangoТестирование и Django
Тестирование и DjangoMoscowDjango
 
Crossing the Bridge: Connecting Rails and your Front-end Framework
Crossing the Bridge: Connecting Rails and your Front-end FrameworkCrossing the Bridge: Connecting Rails and your Front-end Framework
Crossing the Bridge: Connecting Rails and your Front-end FrameworkDaniel Spector
 
DjangoCon US 2011 - Monkeying around at New Relic
DjangoCon US 2011 - Monkeying around at New RelicDjangoCon US 2011 - Monkeying around at New Relic
DjangoCon US 2011 - Monkeying around at New RelicGraham Dumpleton
 
Djangocon11: Monkeying around at New Relic
Djangocon11: Monkeying around at New RelicDjangocon11: Monkeying around at New Relic
Djangocon11: Monkeying around at New RelicNew Relic
 
Deixa para depois, Procrastinando com Celery em Python
Deixa para depois, Procrastinando com Celery em PythonDeixa para depois, Procrastinando com Celery em Python
Deixa para depois, Procrastinando com Celery em PythonAdriano Petrich
 
Using Task Queues and D3.js to build an analytics product on App Engine
Using Task Queues and D3.js to build an analytics product on App EngineUsing Task Queues and D3.js to build an analytics product on App Engine
Using Task Queues and D3.js to build an analytics product on App EngineRiver of Talent
 
[Quase] Tudo que você precisa saber sobre tarefas assíncronas
[Quase] Tudo que você precisa saber sobre  tarefas assíncronas[Quase] Tudo que você precisa saber sobre  tarefas assíncronas
[Quase] Tudo que você precisa saber sobre tarefas assíncronasFilipe Ximenes
 
Tasks: you gotta know how to run them
Tasks: you gotta know how to run themTasks: you gotta know how to run them
Tasks: you gotta know how to run themFilipe Ximenes
 
Python magicmethods
Python magicmethodsPython magicmethods
Python magicmethodsdreampuf
 
AngularJS Tips&Tricks
AngularJS Tips&TricksAngularJS Tips&Tricks
AngularJS Tips&TricksPetr Bela
 
Flask patterns
Flask patternsFlask patterns
Flask patternsit-people
 
QConSP 2015 - Dicas de Performance para Aplicações Web
QConSP 2015 - Dicas de Performance para Aplicações WebQConSP 2015 - Dicas de Performance para Aplicações Web
QConSP 2015 - Dicas de Performance para Aplicações WebFabio Akita
 
Writing HTML5 Web Apps using Backbone.js and GAE
Writing HTML5 Web Apps using Backbone.js and GAEWriting HTML5 Web Apps using Backbone.js and GAE
Writing HTML5 Web Apps using Backbone.js and GAERon Reiter
 
Ansible for beginners ...?
Ansible for beginners ...?Ansible for beginners ...?
Ansible for beginners ...?shirou wakayama
 
JavaScript and UI Architecture Best Practices
JavaScript and UI Architecture Best PracticesJavaScript and UI Architecture Best Practices
JavaScript and UI Architecture Best PracticesSiarhei Barysiuk
 
Building Large jQuery Applications
Building Large jQuery ApplicationsBuilding Large jQuery Applications
Building Large jQuery ApplicationsRebecca Murphey
 
How and why i roll my own node.js framework
How and why i roll my own node.js frameworkHow and why i roll my own node.js framework
How and why i roll my own node.js frameworkBen Lin
 
Nevyn — Promise, It's Async! Swift Language User Group Lightning Talk 2015-09-24
Nevyn — Promise, It's Async! Swift Language User Group Lightning Talk 2015-09-24Nevyn — Promise, It's Async! Swift Language User Group Lightning Talk 2015-09-24
Nevyn — Promise, It's Async! Swift Language User Group Lightning Talk 2015-09-24Joachim Bengtsson
 

Similar to Practical Celery (20)

Testing My Patience
Testing My PatienceTesting My Patience
Testing My Patience
 
Тестирование и Django
Тестирование и DjangoТестирование и Django
Тестирование и Django
 
Crossing the Bridge: Connecting Rails and your Front-end Framework
Crossing the Bridge: Connecting Rails and your Front-end FrameworkCrossing the Bridge: Connecting Rails and your Front-end Framework
Crossing the Bridge: Connecting Rails and your Front-end Framework
 
DjangoCon US 2011 - Monkeying around at New Relic
DjangoCon US 2011 - Monkeying around at New RelicDjangoCon US 2011 - Monkeying around at New Relic
DjangoCon US 2011 - Monkeying around at New Relic
 
Djangocon11: Monkeying around at New Relic
Djangocon11: Monkeying around at New RelicDjangocon11: Monkeying around at New Relic
Djangocon11: Monkeying around at New Relic
 
Deixa para depois, Procrastinando com Celery em Python
Deixa para depois, Procrastinando com Celery em PythonDeixa para depois, Procrastinando com Celery em Python
Deixa para depois, Procrastinando com Celery em Python
 
Using Task Queues and D3.js to build an analytics product on App Engine
Using Task Queues and D3.js to build an analytics product on App EngineUsing Task Queues and D3.js to build an analytics product on App Engine
Using Task Queues and D3.js to build an analytics product on App Engine
 
[Quase] Tudo que você precisa saber sobre tarefas assíncronas
[Quase] Tudo que você precisa saber sobre  tarefas assíncronas[Quase] Tudo que você precisa saber sobre  tarefas assíncronas
[Quase] Tudo que você precisa saber sobre tarefas assíncronas
 
Tasks: you gotta know how to run them
Tasks: you gotta know how to run themTasks: you gotta know how to run them
Tasks: you gotta know how to run them
 
Python magicmethods
Python magicmethodsPython magicmethods
Python magicmethods
 
AngularJS Tips&Tricks
AngularJS Tips&TricksAngularJS Tips&Tricks
AngularJS Tips&Tricks
 
Flask patterns
Flask patternsFlask patterns
Flask patterns
 
QConSP 2015 - Dicas de Performance para Aplicações Web
QConSP 2015 - Dicas de Performance para Aplicações WebQConSP 2015 - Dicas de Performance para Aplicações Web
QConSP 2015 - Dicas de Performance para Aplicações Web
 
Writing HTML5 Web Apps using Backbone.js and GAE
Writing HTML5 Web Apps using Backbone.js and GAEWriting HTML5 Web Apps using Backbone.js and GAE
Writing HTML5 Web Apps using Backbone.js and GAE
 
Ansible for beginners ...?
Ansible for beginners ...?Ansible for beginners ...?
Ansible for beginners ...?
 
JavaScript and UI Architecture Best Practices
JavaScript and UI Architecture Best PracticesJavaScript and UI Architecture Best Practices
JavaScript and UI Architecture Best Practices
 
Play vs Rails
Play vs RailsPlay vs Rails
Play vs Rails
 
Building Large jQuery Applications
Building Large jQuery ApplicationsBuilding Large jQuery Applications
Building Large jQuery Applications
 
How and why i roll my own node.js framework
How and why i roll my own node.js frameworkHow and why i roll my own node.js framework
How and why i roll my own node.js framework
 
Nevyn — Promise, It's Async! Swift Language User Group Lightning Talk 2015-09-24
Nevyn — Promise, It's Async! Swift Language User Group Lightning Talk 2015-09-24Nevyn — Promise, It's Async! Swift Language User Group Lightning Talk 2015-09-24
Nevyn — Promise, It's Async! Swift Language User Group Lightning Talk 2015-09-24
 

Recently uploaded

"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack
"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack
"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek SchlawackFwdays
 
Story boards and shot lists for my a level piece
Story boards and shot lists for my a level pieceStory boards and shot lists for my a level piece
Story boards and shot lists for my a level piececharlottematthew16
 
Ensuring Technical Readiness For Copilot in Microsoft 365
Ensuring Technical Readiness For Copilot in Microsoft 365Ensuring Technical Readiness For Copilot in Microsoft 365
Ensuring Technical Readiness For Copilot in Microsoft 3652toLead Limited
 
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsMiki Katsuragi
 
SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024Lorenzo Miniero
 
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...Fwdays
 
Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?Mattias Andersson
 
SAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxSAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxNavinnSomaal
 
AI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsAI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsMemoori
 
Developer Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLDeveloper Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLScyllaDB
 
Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Enterprise Knowledge
 
My Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationMy Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationRidwan Fadjar
 
Vector Databases 101 - An introduction to the world of Vector Databases
Vector Databases 101 - An introduction to the world of Vector DatabasesVector Databases 101 - An introduction to the world of Vector Databases
Vector Databases 101 - An introduction to the world of Vector DatabasesZilliz
 
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Mark Simos
 
Scanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL CertsScanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL CertsRizwan Syed
 
Search Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfSearch Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfRankYa
 
WordPress Websites for Engineers: Elevate Your Brand
WordPress Websites for Engineers: Elevate Your BrandWordPress Websites for Engineers: Elevate Your Brand
WordPress Websites for Engineers: Elevate Your Brandgvaughan
 
Training state-of-the-art general text embedding
Training state-of-the-art general text embeddingTraining state-of-the-art general text embedding
Training state-of-the-art general text embeddingZilliz
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsMark Billinghurst
 

Recently uploaded (20)

"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack
"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack
"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack
 
Story boards and shot lists for my a level piece
Story boards and shot lists for my a level pieceStory boards and shot lists for my a level piece
Story boards and shot lists for my a level piece
 
Ensuring Technical Readiness For Copilot in Microsoft 365
Ensuring Technical Readiness For Copilot in Microsoft 365Ensuring Technical Readiness For Copilot in Microsoft 365
Ensuring Technical Readiness For Copilot in Microsoft 365
 
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptxE-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
 
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering Tips
 
SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024
 
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
 
Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?
 
SAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxSAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptx
 
AI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsAI as an Interface for Commercial Buildings
AI as an Interface for Commercial Buildings
 
Developer Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLDeveloper Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQL
 
Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024
 
My Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationMy Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 Presentation
 
Vector Databases 101 - An introduction to the world of Vector Databases
Vector Databases 101 - An introduction to the world of Vector DatabasesVector Databases 101 - An introduction to the world of Vector Databases
Vector Databases 101 - An introduction to the world of Vector Databases
 
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
 
Scanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL CertsScanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL Certs
 
Search Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfSearch Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdf
 
WordPress Websites for Engineers: Elevate Your Brand
WordPress Websites for Engineers: Elevate Your BrandWordPress Websites for Engineers: Elevate Your Brand
WordPress Websites for Engineers: Elevate Your Brand
 
Training state-of-the-art general text embedding
Training state-of-the-art general text embeddingTraining state-of-the-art general text embedding
Training state-of-the-art general text embedding
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR Systems
 

Practical Celery

  • 2. CAMERON MASKE twitter: @cameronmaske email: cam@trackmaven.com web: http://cameronmaske.com
  • 4. WHAT IS CELERY? HOW DOES IT WORK?
  • 5. USING CELERY, BEST PRACTICES AND SCALING.
  • 8. OUT OF THE REQUEST/RESPONSE CYCLE. Example: Sending emails asynchronously.
  • 9. TASKS IN THE BACKGROUND. Example: Computational heavy jobs. Example: Interacting with external APIs.
  • 11. HISTORY Python. Released (0.1) in 2009. Currently on 3.1, with 3.2 in alpha. Developed by Ask Solem (@asksol)
  • 13. PRODUCER Produces a task for the queue.
  • 14. BROKER Stores the task backlog Answers, what work remains to be done? RabbitMQ, Redis, SQLAlchemy, Django's ORM, MongoDB...
  • 15. WORKER Execute and consumes tasks. Distributed.
  • 16. RESULTS BACKEND. Stores the results from our tasks. Redis, Redis, SQLAlchemy, Django's ORM, MongoDB... Optional!
  • 18. from celery import Celery app = Celery('tasks', backend='amqp', broker='amqp://guest@localhost//') @app.task def add(x, y): return x + y
  • 19. >>> result = add.delay(4, 4) >>> result.state 'SUCCESS' >>> result.id '4cc7438e-afd4-4f8f-a2f3-f46567e7ca77' >>> result.get() 8 http://celery.readthedocs.org/en/latest/reference/celery.result.html
  • 20. PICK YOUR FLAVOR. @app.task def add(x, y): return x + y add(2, 4) class AddTask(app.Task): def run(self, x, y): return x + y AddTask().run(2, 4)
  • 21. # Async add.delay(2, 4) add.apply_aync(args=(2, 4), expires=30) # Eager! result = add.apply(args=(2, 4)) # Executes locally. # Or... add(2, 4) # Does not return a celery result!
  • 24. http://docs.celeryproject.org/en/master/django/first-steps-with-django. html - project/ - config/__init__.py - config/settings.py - config/urls.py - manage.py
  • 25. # project/config/celery.py from __future__ import absolute_import import os from celery import Celery from django.conf import settings # Set the default Django settings module for the 'celery' program. os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') app = Celery('app') # Using a string here means the worker will not have to # pickle the object when using Windows. app.config_from_object('django.conf:settings') app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) @app.task(bind=True) def debug_task(self): print('Request: {0!r}'.format(self.request))
  • 26. # project/config/__init__.py from __future__ import absolute_import # This will make sure the app is always imported when # Django starts so that shared_task will use this app. from .celery import app as celery_app __all__ = ['celery_app']
  • 27. celery -A project worker -l info
  • 28. TESTING # settings.py import sys if 'test' in sys.argv: CELERY_EAGER_PROPAGATES_EXCEPTIONS=True, CELERY_ALWAYS_EAGER=True, BROKER_BACKEND='memory'
  • 29. PATTERNS AND BEST PRACTICES.
  • 30. NEVER PASS OBJECTS AS ARGUMENTS.
  • 31. # Bad @app.task() def send_reminder(reminder): reminder.send_email() # Good @app.task() def send_reminder(pk): try: reminder = Reminder.objects.get(pk=pk) except Reminder.DoesNotExist: return reminder.send_email()
  • 32. KEEP TASKS GRANUAL. CAN PROCESS MORE IN PARALLEL.
  • 34. # Bad @app.task def update_page_info(url): page = fetch_page.delay(url).get() info = parse_page.delay(url, page).get() store_page_info.delay(url, info) @app.task def fetch_page(url): return myhttplib.get(url) @app.task def parse_page(url, page): return myparser.parse_document(page) @app.task def store_page_info(url, info): return PageInfo.objects.create(url, info)
  • 35. # Good def update_page_info(url): chain = fetch_page.s() | parse_page.s() | store_page_info.s(url) chain() @app.task() def fetch_page(url): return myhttplib.get(url) @app.task() def parse_page(page): return myparser.parse_document(page) @app.task(ignore_result=True) def store_page_info(info, url): PageInfo.objects.create(url=url, info=info) http://celery.readthedocs.org/en/latest/userguide/canvas.html
  • 37. from datetime import timedelta @app.periodic_task(run_every=timedelta(minutes=5)): def run_every_five(): pass
  • 38. from datetime import timedelta class RunEveryFive(app.PeriodicTask): run_every = timedelta(minutes=5) def run(self): pass
  • 39. from datetime import timedelta @app.task(): def run_every_five(): pass CELERYBEAT_SCHEDULE = { 'run-every-five': { 'task': 'tasks.run_every_five', 'schedule': timedelta(seconds=30) }, }
  • 40. CRON STYLE. from celery.schedules import crontab crontab(minute=0, hour='*/3') # Every 3 hours. crontab(day_of_week='sunday') # Every minute on Sundays. crontab(0, 0, 0, month_of_year='*/3') # First month of every quarter.
  • 41. @app.periodic_task(run_every=crontab(minute=0, hour=1)) def schedule_emails(): user_ids = User.objects.values_list('id', flat=True) for user_id in user_ids: send_daily_email.delay(user_id) @app.task() def send_daily_email(user_id): user = User.objects.get(id=user_id) try: today = datetime.now() Email.objects.get( user=user, date__year=today.year, date__month=today.month, date__day=today.day) except Email.DoesNotExist: email = Email(user=user, body="Hey, don't forget to LOGIN PLEASE!") email.send() email.save()
  • 42. CELERY BEAT A.K.A THE SCHEDULER. celery -A project beat
  • 43. NEVER RUN A BEAT + WORKER ON A SINGLE CELERY PROCESS. # Really bad idea.... celery -A project worker -B
  • 44. FREQUENTLY RUNNING PERIODIC TASKS. BEWARE OF "TASK STACKING"
  • 45. Schedule task runs every 5 minutes. Tasks take 30 minutes. Schedule task stacks. Bad stuff.
  • 46. EXPIRES! from time import sleep @app.periodic_task(expires=5*60, run_every=timedelta(minutes=5)) def schedule_task(): for _ in range(30): one_minute_task.delay() @app.task(expires=5*60) def one_minute_task(): sleep(60)
  • 47. THINGS GO WRONG IN TASKS!
  • 48. RETRY
  • 49. from celery.exceptions import Retry @app.task(max_retries=10) def gather_data(): try: data = api.get_data() # etc, etc, ... except api.RateLimited as e: raise Retry(exc=e, when=e.cooldown) except api.IsDown: return
  • 53. class DebugTask(app.Task): def after_return(self, status, retval, task_id, args, kwargs, einfo): print("I'm done!") def on_failure(self, exc, task_id, args, kwargs, einfo): print("I failed :(") def on_retry(self, exc, task_id, args, kwargs, einfo): print("I'll try again!") def on_success(self, retval, task_id, args, kwargs): print("I did it!")
  • 54. ABSTRACT class AbstractTask(app.Task): abstract = True def after_return(self, *args, **kwargs): print("All done!") @app.task(base=AbstractTask) def add(x, y): return x + y
  • 55. INSTANTIATION class DatabaseTask(app.Task): abstract = True _db = None @property def db(self): if self._db is None: self._db = Database.connect() return self._db
  • 56. ENSURE A TASK IS EXECUTED ONE AT A TIME
  • 57. from celery import task from celery.utils.log import get_task_logger from django.core.cache import cache from django.utils.hashcompat import md5_constructor as md5 from djangofeeds.models import Feed logger = get_task_logger(__name__) LOCK_EXPIRE = 60 * 5 # Lock expires in 5 minutes @task def import_feed(feed_url): # The cache key consists of the task name and the MD5 digest # of the feed URL. feed_url_digest = md5(feed_url).hexdigest() lock_id = '{0}-lock-{1}'.format(self.name, feed_url_hexdigest) # cache.add fails if if the key already exists acquire_lock = lambda: cache.add(lock_id, 'true', LOCK_EXPIRE) # memcache delete is very slow, but we have to use it to take # advantage of using add() for atomic locking release_lock = lambda: cache.delete(lock_id) logger.debug('Importing feed: %s', feed_url) if acquire_lock(): try: feed = Feed.objects.import_feed(feed_url) finally: release_lock() return feed.url logger.debug( 'Feed %s is already being imported by another worker', feed_url)
  • 59. # settings.py CELERY_IGNORE_RESULT = True CELERYD_TASK_SOFT_TIME_LIMIT = 500 CELERYD_TASK_TIME_LIMIT = 1000
  • 60. # tasks.py app.task(ignore_result=True, soft_time_limit=60, time_limit=120) def add(x, y): pass
  • 61. # settings.py CELERYD_MAX_TASKS_PER_CHILD = 500 CELERYD_PREFETCH_MULTIPLIER = 4
  • 63. SO MANY CHOICES! RabbitMQ Redis SQLAlchemy Django's ORM MongoDB Amazon SQS CouchDB Beanstalk IronMQ
  • 64. DJANGO ORM. # settings.py BROKER_URL = 'django://' INSTALLED_APPS = ( 'kombu.transport.django', ) CELERY_RESULT_BACKEND='djcelery.backends.database:DatabaseBackend' python manage.py syncdb
  • 65. DON'T DO THIS FOR ANYTHING SERIOUS.
  • 67. C OPTIMIZED LIBRARY $ pip install librabbitmq
  • 69. CONCURRENCY celery -A project worker -C 10 celery -A project worker --autoscale=10,1
  • 70. INCREASED CONCURRENCY CAN QUICKLY DRAIN CONNECTIONS ON YOUR DATABASE Use a connection pooler (pgbouncer).
  • 72. CELERY_ROUTES = { 'email.tasks.send_mail': { 'queue': 'priority', }, } # or send_mail.apply_async(queue="priority") celery -A project worker -Q email
  • 76. Make tasks faster. Reduce volume of tasks.
  • 78.
  • 81. RABBITMQ MANGEMENT PLUGIN HAS A GREAT HTTP API!