Skip to content

Commit

Permalink
gh-97696: asyncio eager tasks factory (#102853)
Browse files Browse the repository at this point in the history
Co-authored-by: Jacob Bower <[email protected]>
Co-authored-by: Carol Willing <[email protected]>
  • Loading branch information
3 people authored May 1, 2023
1 parent 59bc36a commit a474e04
Show file tree
Hide file tree
Showing 12 changed files with 945 additions and 47 deletions.
36 changes: 36 additions & 0 deletions Doc/library/asyncio-task.rst
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,42 @@ Running Tasks Concurrently
and there is no running event loop.


Eager Task Factory
==================

.. function:: eager_task_factory(loop, coro, *, name=None, context=None)

A task factory for eager task execution.

When using this factory (via :meth:`loop.set_task_factory(asyncio.eager_task_factory) <loop.set_task_factory>`),
coroutines begin execution synchronously during :class:`Task` construction.
Tasks are only scheduled on the event loop if they block.
This can be a performance improvement as the overhead of loop scheduling
is avoided for coroutines that complete synchronously.

A common example where this is beneficial is coroutines which employ
caching or memoization to avoid actual I/O when possible.

.. note::

Immediate execution of the coroutine is a semantic change.
If the coroutine returns or raises, the task is never scheduled
to the event loop. If the coroutine execution blocks, the task is
scheduled to the event loop. This change may introduce behavior
changes to existing applications. For example,
the application's task execution order is likely to change.

.. versionadded:: 3.12

.. function:: create_eager_task_factory(custom_task_constructor)

Create an eager task factory, similar to :func:`eager_task_factory`,
using the provided *custom_task_constructor* when creating a new task instead
of the default :class:`Task`.

.. versionadded:: 3.12


Shielding From Cancellation
===========================

Expand Down
5 changes: 5 additions & 0 deletions Doc/whatsnew/3.12.rst
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,11 @@ Optimizations
* Speed up :class:`asyncio.Task` creation by deferring expensive string formatting.
(Contributed by Itamar O in :gh:`103793`.)

* Added :func:`asyncio.eager_task_factory` and :func:`asyncio.create_eager_task_factory`
functions to allow opting an event loop in to eager task execution,
speeding up some use-cases by up to 50%.
(Contributed by Jacob Bower & Itamar O in :gh:`102853`)


CPython bytecode changes
========================
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_global_objects_fini_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(dst_dir_fd)
STRUCT_FOR_ID(duration)
STRUCT_FOR_ID(e)
STRUCT_FOR_ID(eager_start)
STRUCT_FOR_ID(effective_ids)
STRUCT_FOR_ID(element_factory)
STRUCT_FOR_ID(encode)
Expand Down Expand Up @@ -460,6 +461,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(instructions)
STRUCT_FOR_ID(intern)
STRUCT_FOR_ID(intersection)
STRUCT_FOR_ID(is_running)
STRUCT_FOR_ID(isatty)
STRUCT_FOR_ID(isinstance)
STRUCT_FOR_ID(isoformat)
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_runtime_init_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions Include/internal/pycore_unicodeobject_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions Lib/asyncio/base_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ def _task_repr_info(task):

info.insert(1, 'name=%r' % task.get_name())

coro = coroutines._format_coroutine(task._coro)
info.insert(2, f'coro=<{coro}>')

if task._fut_waiter is not None:
info.insert(3, f'wait_for={task._fut_waiter!r}')
info.insert(2, f'wait_for={task._fut_waiter!r}')

if task._coro:
coro = coroutines._format_coroutine(task._coro)
info.insert(2, f'coro=<{coro}>')

return info


Expand Down
122 changes: 100 additions & 22 deletions Lib/asyncio/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
'wait', 'wait_for', 'as_completed', 'sleep',
'gather', 'shield', 'ensure_future', 'run_coroutine_threadsafe',
'current_task', 'all_tasks',
'create_eager_task_factory', 'eager_task_factory',
'_register_task', '_unregister_task', '_enter_task', '_leave_task',
)

Expand Down Expand Up @@ -43,22 +44,26 @@ def all_tasks(loop=None):
"""Return a set of all tasks for the loop."""
if loop is None:
loop = events.get_running_loop()
# Looping over a WeakSet (_all_tasks) isn't safe as it can be updated from another
# thread while we do so. Therefore we cast it to list prior to filtering. The list
# cast itself requires iteration, so we repeat it several times ignoring
# RuntimeErrors (which are not very likely to occur). See issues 34970 and 36607 for
# details.
# capturing the set of eager tasks first, so if an eager task "graduates"
# to a regular task in another thread, we don't risk missing it.
eager_tasks = list(_eager_tasks)
# Looping over the WeakSet isn't safe as it can be updated from another
# thread, therefore we cast it to list prior to filtering. The list cast
# itself requires iteration, so we repeat it several times ignoring
# RuntimeErrors (which are not very likely to occur).
# See issues 34970 and 36607 for details.
scheduled_tasks = None
i = 0
while True:
try:
tasks = list(_all_tasks)
scheduled_tasks = list(_scheduled_tasks)
except RuntimeError:
i += 1
if i >= 1000:
raise
else:
break
return {t for t in tasks
return {t for t in itertools.chain(scheduled_tasks, eager_tasks)
if futures._get_loop(t) is loop and not t.done()}


Expand Down Expand Up @@ -93,7 +98,8 @@ class Task(futures._PyFuture): # Inherit Python Task implementation
# status is still pending
_log_destroy_pending = True

def __init__(self, coro, *, loop=None, name=None, context=None):
def __init__(self, coro, *, loop=None, name=None, context=None,
eager_start=False):
super().__init__(loop=loop)
if self._source_traceback:
del self._source_traceback[-1]
Expand All @@ -117,8 +123,11 @@ def __init__(self, coro, *, loop=None, name=None, context=None):
else:
self._context = context

self._loop.call_soon(self.__step, context=self._context)
_register_task(self)
if eager_start and self._loop.is_running():
self.__eager_start()
else:
self._loop.call_soon(self.__step, context=self._context)
_register_task(self)

def __del__(self):
if self._state == futures._PENDING and self._log_destroy_pending:
Expand Down Expand Up @@ -250,6 +259,25 @@ def uncancel(self):
self._num_cancels_requested -= 1
return self._num_cancels_requested

def __eager_start(self):
prev_task = _swap_current_task(self._loop, self)
try:
_register_eager_task(self)
try:
self._context.run(self.__step_run_and_handle_result, None)
finally:
_unregister_eager_task(self)
finally:
try:
curtask = _swap_current_task(self._loop, prev_task)
assert curtask is self
finally:
if self.done():
self._coro = None
self = None # Needed to break cycles when an exception occurs.
else:
_register_task(self)

def __step(self, exc=None):
if self.done():
raise exceptions.InvalidStateError(
Expand All @@ -258,11 +286,17 @@ def __step(self, exc=None):
if not isinstance(exc, exceptions.CancelledError):
exc = self._make_cancelled_error()
self._must_cancel = False
coro = self._coro
self._fut_waiter = None

_enter_task(self._loop, self)
# Call either coro.throw(exc) or coro.send(None).
try:
self.__step_run_and_handle_result(exc)
finally:
_leave_task(self._loop, self)
self = None # Needed to break cycles when an exception occurs.

def __step_run_and_handle_result(self, exc):
coro = self._coro
try:
if exc is None:
# We use the `send` method directly, because coroutines
Expand Down Expand Up @@ -334,7 +368,6 @@ def __step(self, exc=None):
self._loop.call_soon(
self.__step, new_exc, context=self._context)
finally:
_leave_task(self._loop, self)
self = None # Needed to break cycles when an exception occurs.

def __wakeup(self, future):
Expand Down Expand Up @@ -897,17 +930,41 @@ def callback():
return future


# WeakSet containing all alive tasks.
_all_tasks = weakref.WeakSet()
def create_eager_task_factory(custom_task_constructor):

if "eager_start" not in inspect.signature(custom_task_constructor).parameters:
raise TypeError(
"Provided constructor does not support eager task execution")

def factory(loop, coro, *, name=None, context=None):
return custom_task_constructor(
coro, loop=loop, name=name, context=context, eager_start=True)


return factory

eager_task_factory = create_eager_task_factory(Task)


# Collectively these two sets hold references to the complete set of active
# tasks. Eagerly executed tasks use a faster regular set as an optimization
# but may graduate to a WeakSet if the task blocks on IO.
_scheduled_tasks = weakref.WeakSet()
_eager_tasks = set()

# Dictionary containing tasks that are currently active in
# all running event loops. {EventLoop: Task}
_current_tasks = {}


def _register_task(task):
"""Register a new task in asyncio as executed by loop."""
_all_tasks.add(task)
"""Register an asyncio Task scheduled to run on an event loop."""
_scheduled_tasks.add(task)


def _register_eager_task(task):
"""Register an asyncio Task about to be eagerly executed."""
_eager_tasks.add(task)


def _enter_task(loop, task):
Expand All @@ -926,28 +983,49 @@ def _leave_task(loop, task):
del _current_tasks[loop]


def _swap_current_task(loop, task):
prev_task = _current_tasks.get(loop)
if task is None:
del _current_tasks[loop]
else:
_current_tasks[loop] = task
return prev_task


def _unregister_task(task):
"""Unregister a task."""
_all_tasks.discard(task)
"""Unregister a completed, scheduled Task."""
_scheduled_tasks.discard(task)


def _unregister_eager_task(task):
"""Unregister a task which finished its first eager step."""
_eager_tasks.discard(task)


_py_current_task = current_task
_py_register_task = _register_task
_py_register_eager_task = _register_eager_task
_py_unregister_task = _unregister_task
_py_unregister_eager_task = _unregister_eager_task
_py_enter_task = _enter_task
_py_leave_task = _leave_task
_py_swap_current_task = _swap_current_task


try:
from _asyncio import (_register_task, _unregister_task,
_enter_task, _leave_task,
_all_tasks, _current_tasks,
from _asyncio import (_register_task, _register_eager_task,
_unregister_task, _unregister_eager_task,
_enter_task, _leave_task, _swap_current_task,
_scheduled_tasks, _eager_tasks, _current_tasks,
current_task)
except ImportError:
pass
else:
_c_current_task = current_task
_c_register_task = _register_task
_c_register_eager_task = _register_eager_task
_c_unregister_task = _unregister_task
_c_unregister_eager_task = _unregister_eager_task
_c_enter_task = _enter_task
_c_leave_task = _leave_task
_c_swap_current_task = _swap_current_task
Loading

0 comments on commit a474e04

Please sign in to comment.