AlkantarClanX12

Your IP : 18.225.175.230


Current Path : /opt/hc_python/lib/python3.8/site-packages/sqlalchemy/ext/
Upload File :
Current File : //opt/hc_python/lib/python3.8/site-packages/sqlalchemy/ext/horizontal_shard.py

# ext/horizontal_shard.py
# Copyright (C) 2005-2024 the SQLAlchemy authors and contributors
# <see AUTHORS file>
#
# This module is part of SQLAlchemy and is released under
# the MIT License: https://www.opensource.org/licenses/mit-license.php

"""Horizontal sharding support.

Defines a rudimental 'horizontal sharding' system which allows a Session to
distribute queries and persistence operations across multiple databases.

For a usage example, see the :ref:`examples_sharding` example included in
the source distribution.

.. deepalchemy:: The horizontal sharding extension is an advanced feature,
   involving a complex statement -> database interaction as well as
   use of semi-public APIs for non-trivial cases.   Simpler approaches to
   refering to multiple database "shards", most commonly using a distinct
   :class:`_orm.Session` per "shard", should always be considered first
   before using this more complex and less-production-tested system.



"""
from __future__ import annotations

from typing import Any
from typing import Callable
from typing import Dict
from typing import Iterable
from typing import Optional
from typing import Tuple
from typing import Type
from typing import TYPE_CHECKING
from typing import TypeVar
from typing import Union

from .. import event
from .. import exc
from .. import inspect
from .. import util
from ..orm import PassiveFlag
from ..orm._typing import OrmExecuteOptionsParameter
from ..orm.interfaces import ORMOption
from ..orm.mapper import Mapper
from ..orm.query import Query
from ..orm.session import _BindArguments
from ..orm.session import _PKIdentityArgument
from ..orm.session import Session
from ..util.typing import Protocol
from ..util.typing import Self

if TYPE_CHECKING:
    from ..engine.base import Connection
    from ..engine.base import Engine
    from ..engine.base import OptionEngine
    from ..engine.result import IteratorResult
    from ..engine.result import Result
    from ..orm import LoaderCallableStatus
    from ..orm._typing import _O
    from ..orm.bulk_persistence import BulkUDCompileState
    from ..orm.context import QueryContext
    from ..orm.session import _EntityBindKey
    from ..orm.session import _SessionBind
    from ..orm.session import ORMExecuteState
    from ..orm.state import InstanceState
    from ..sql import Executable
    from ..sql._typing import _TP
    from ..sql.elements import ClauseElement

__all__ = ["ShardedSession", "ShardedQuery"]

_T = TypeVar("_T", bound=Any)


ShardIdentifier = str


class ShardChooser(Protocol):
    def __call__(
        self,
        mapper: Optional[Mapper[_T]],
        instance: Any,
        clause: Optional[ClauseElement],
    ) -> Any: ...


class IdentityChooser(Protocol):
    def __call__(
        self,
        mapper: Mapper[_T],
        primary_key: _PKIdentityArgument,
        *,
        lazy_loaded_from: Optional[InstanceState[Any]],
        execution_options: OrmExecuteOptionsParameter,
        bind_arguments: _BindArguments,
        **kw: Any,
    ) -> Any: ...


class ShardedQuery(Query[_T]):
    """Query class used with :class:`.ShardedSession`.

    .. legacy:: The :class:`.ShardedQuery` is a subclass of the legacy
       :class:`.Query` class.   The :class:`.ShardedSession` now supports
       2.0 style execution via the :meth:`.ShardedSession.execute` method.

    """

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)
        assert isinstance(self.session, ShardedSession)

        self.identity_chooser = self.session.identity_chooser
        self.execute_chooser = self.session.execute_chooser
        self._shard_id = None

    def set_shard(self, shard_id: ShardIdentifier) -> Self:
        """Return a new query, limited to a single shard ID.

        All subsequent operations with the returned query will
        be against the single shard regardless of other state.

        The shard_id can be passed for a 2.0 style execution to the
        bind_arguments dictionary of :meth:`.Session.execute`::

            results = session.execute(
                stmt,
                bind_arguments={"shard_id": "my_shard"}
            )

        """
        return self.execution_options(_sa_shard_id=shard_id)


class ShardedSession(Session):
    shard_chooser: ShardChooser
    identity_chooser: IdentityChooser
    execute_chooser: Callable[[ORMExecuteState], Iterable[Any]]

    def __init__(
        self,
        shard_chooser: ShardChooser,
        identity_chooser: Optional[IdentityChooser] = None,
        execute_chooser: Optional[
            Callable[[ORMExecuteState], Iterable[Any]]
        ] = None,
        shards: Optional[Dict[str, Any]] = None,
        query_cls: Type[Query[_T]] = ShardedQuery,
        *,
        id_chooser: Optional[
            Callable[[Query[_T], Iterable[_T]], Iterable[Any]]
        ] = None,
        query_chooser: Optional[Callable[[Executable], Iterable[Any]]] = None,
        **kwargs: Any,
    ) -> None:
        """Construct a ShardedSession.

        :param shard_chooser: A callable which, passed a Mapper, a mapped
          instance, and possibly a SQL clause, returns a shard ID.  This id
          may be based off of the attributes present within the object, or on
          some round-robin scheme. If the scheme is based on a selection, it
          should set whatever state on the instance to mark it in the future as
          participating in that shard.

        :param identity_chooser: A callable, passed a Mapper and primary key
         argument, which should return a list of shard ids where this
         primary key might reside.

          .. versionchanged:: 2.0  The ``identity_chooser`` parameter
             supersedes the ``id_chooser`` parameter.

        :param execute_chooser: For a given :class:`.ORMExecuteState`,
          returns the list of shard_ids
          where the query should be issued.  Results from all shards returned
          will be combined together into a single listing.

          .. versionchanged:: 1.4  The ``execute_chooser`` parameter
             supersedes the ``query_chooser`` parameter.

        :param shards: A dictionary of string shard names
          to :class:`~sqlalchemy.engine.Engine` objects.

        """
        super().__init__(query_cls=query_cls, **kwargs)

        event.listen(
            self, "do_orm_execute", execute_and_instances, retval=True
        )
        self.shard_chooser = shard_chooser

        if id_chooser:
            _id_chooser = id_chooser
            util.warn_deprecated(
                "The ``id_chooser`` parameter is deprecated; "
                "please use ``identity_chooser``.",
                "2.0",
            )

            def _legacy_identity_chooser(
                mapper: Mapper[_T],
                primary_key: _PKIdentityArgument,
                *,
                lazy_loaded_from: Optional[InstanceState[Any]],
                execution_options: OrmExecuteOptionsParameter,
                bind_arguments: _BindArguments,
                **kw: Any,
            ) -> Any:
                q = self.query(mapper)
                if lazy_loaded_from:
                    q = q._set_lazyload_from(lazy_loaded_from)
                return _id_chooser(q, primary_key)

            self.identity_chooser = _legacy_identity_chooser
        elif identity_chooser:
            self.identity_chooser = identity_chooser
        else:
            raise exc.ArgumentError(
                "identity_chooser or id_chooser is required"
            )

        if query_chooser:
            _query_chooser = query_chooser
            util.warn_deprecated(
                "The ``query_chooser`` parameter is deprecated; "
                "please use ``execute_chooser``.",
                "1.4",
            )
            if execute_chooser:
                raise exc.ArgumentError(
                    "Can't pass query_chooser and execute_chooser "
                    "at the same time."
                )

            def _default_execute_chooser(
                orm_context: ORMExecuteState,
            ) -> Iterable[Any]:
                return _query_chooser(orm_context.statement)

            if execute_chooser is None:
                execute_chooser = _default_execute_chooser

        if execute_chooser is None:
            raise exc.ArgumentError(
                "execute_chooser or query_chooser is required"
            )
        self.execute_chooser = execute_chooser
        self.__shards: Dict[ShardIdentifier, _SessionBind] = {}
        if shards is not None:
            for k in shards:
                self.bind_shard(k, shards[k])

    def _identity_lookup(
        self,
        mapper: Mapper[_O],
        primary_key_identity: Union[Any, Tuple[Any, ...]],
        identity_token: Optional[Any] = None,
        passive: PassiveFlag = PassiveFlag.PASSIVE_OFF,
        lazy_loaded_from: Optional[InstanceState[Any]] = None,
        execution_options: OrmExecuteOptionsParameter = util.EMPTY_DICT,
        bind_arguments: Optional[_BindArguments] = None,
        **kw: Any,
    ) -> Union[Optional[_O], LoaderCallableStatus]:
        """override the default :meth:`.Session._identity_lookup` method so
        that we search for a given non-token primary key identity across all
        possible identity tokens (e.g. shard ids).

        .. versionchanged:: 1.4  Moved :meth:`.Session._identity_lookup` from
           the :class:`_query.Query` object to the :class:`.Session`.

        """

        if identity_token is not None:
            obj = super()._identity_lookup(
                mapper,
                primary_key_identity,
                identity_token=identity_token,
                **kw,
            )

            return obj
        else:
            for shard_id in self.identity_chooser(
                mapper,
                primary_key_identity,
                lazy_loaded_from=lazy_loaded_from,
                execution_options=execution_options,
                bind_arguments=dict(bind_arguments) if bind_arguments else {},
            ):
                obj2 = super()._identity_lookup(
                    mapper,
                    primary_key_identity,
                    identity_token=shard_id,
                    lazy_loaded_from=lazy_loaded_from,
                    **kw,
                )
                if obj2 is not None:
                    return obj2

            return None

    def _choose_shard_and_assign(
        self,
        mapper: Optional[_EntityBindKey[_O]],
        instance: Any,
        **kw: Any,
    ) -> Any:
        if instance is not None:
            state = inspect(instance)
            if state.key:
                token = state.key[2]
                assert token is not None
                return token
            elif state.identity_token:
                return state.identity_token

        assert isinstance(mapper, Mapper)
        shard_id = self.shard_chooser(mapper, instance, **kw)
        if instance is not None:
            state.identity_token = shard_id
        return shard_id

    def connection_callable(  # type: ignore [override]
        self,
        mapper: Optional[Mapper[_T]] = None,
        instance: Optional[Any] = None,
        shard_id: Optional[ShardIdentifier] = None,
        **kw: Any,
    ) -> Connection:
        """Provide a :class:`_engine.Connection` to use in the unit of work
        flush process.

        """

        if shard_id is None:
            shard_id = self._choose_shard_and_assign(mapper, instance)

        if self.in_transaction():
            trans = self.get_transaction()
            assert trans is not None
            return trans.connection(mapper, shard_id=shard_id)
        else:
            bind = self.get_bind(
                mapper=mapper, shard_id=shard_id, instance=instance
            )

            if isinstance(bind, Engine):
                return bind.connect(**kw)
            else:
                assert isinstance(bind, Connection)
                return bind

    def get_bind(
        self,
        mapper: Optional[_EntityBindKey[_O]] = None,
        *,
        shard_id: Optional[ShardIdentifier] = None,
        instance: Optional[Any] = None,
        clause: Optional[ClauseElement] = None,
        **kw: Any,
    ) -> _SessionBind:
        if shard_id is None:
            shard_id = self._choose_shard_and_assign(
                mapper, instance=instance, clause=clause
            )
            assert shard_id is not None
        return self.__shards[shard_id]

    def bind_shard(
        self, shard_id: ShardIdentifier, bind: Union[Engine, OptionEngine]
    ) -> None:
        self.__shards[shard_id] = bind


class set_shard_id(ORMOption):
    """a loader option for statements to apply a specific shard id to the
    primary query as well as for additional relationship and column
    loaders.

    The :class:`_horizontal.set_shard_id` option may be applied using
    the :meth:`_sql.Executable.options` method of any executable statement::

        stmt = (
            select(MyObject).
            where(MyObject.name == 'some name').
            options(set_shard_id("shard1"))
        )

    Above, the statement when invoked will limit to the "shard1" shard
    identifier for the primary query as well as for all relationship and
    column loading strategies, including eager loaders such as
    :func:`_orm.selectinload`, deferred column loaders like :func:`_orm.defer`,
    and the lazy relationship loader :func:`_orm.lazyload`.

    In this way, the :class:`_horizontal.set_shard_id` option has much wider
    scope than using the "shard_id" argument within the
    :paramref:`_orm.Session.execute.bind_arguments` dictionary.


    .. versionadded:: 2.0.0

    """

    __slots__ = ("shard_id", "propagate_to_loaders")

    def __init__(
        self, shard_id: ShardIdentifier, propagate_to_loaders: bool = True
    ):
        """Construct a :class:`_horizontal.set_shard_id` option.

        :param shard_id: shard identifier
        :param propagate_to_loaders: if left at its default of ``True``, the
         shard option will take place for lazy loaders such as
         :func:`_orm.lazyload` and :func:`_orm.defer`; if False, the option
         will not be propagated to loaded objects. Note that :func:`_orm.defer`
         always limits to the shard_id of the parent row in any case, so the
         parameter only has a net effect on the behavior of the
         :func:`_orm.lazyload` strategy.

        """
        self.shard_id = shard_id
        self.propagate_to_loaders = propagate_to_loaders


def execute_and_instances(
    orm_context: ORMExecuteState,
) -> Union[Result[_T], IteratorResult[_TP]]:
    active_options: Union[
        None,
        QueryContext.default_load_options,
        Type[QueryContext.default_load_options],
        BulkUDCompileState.default_update_options,
        Type[BulkUDCompileState.default_update_options],
    ]

    if orm_context.is_select:
        active_options = orm_context.load_options

    elif orm_context.is_update or orm_context.is_delete:
        active_options = orm_context.update_delete_options
    else:
        active_options = None

    session = orm_context.session
    assert isinstance(session, ShardedSession)

    def iter_for_shard(
        shard_id: ShardIdentifier,
    ) -> Union[Result[_T], IteratorResult[_TP]]:
        bind_arguments = dict(orm_context.bind_arguments)
        bind_arguments["shard_id"] = shard_id

        orm_context.update_execution_options(identity_token=shard_id)
        return orm_context.invoke_statement(bind_arguments=bind_arguments)

    for orm_opt in orm_context._non_compile_orm_options:
        # TODO: if we had an ORMOption that gets applied at ORM statement
        # execution time, that would allow this to be more generalized.
        # for now just iterate and look for our options
        if isinstance(orm_opt, set_shard_id):
            shard_id = orm_opt.shard_id
            break
    else:
        if active_options and active_options._identity_token is not None:
            shard_id = active_options._identity_token
        elif "_sa_shard_id" in orm_context.execution_options:
            shard_id = orm_context.execution_options["_sa_shard_id"]
        elif "shard_id" in orm_context.bind_arguments:
            shard_id = orm_context.bind_arguments["shard_id"]
        else:
            shard_id = None

    if shard_id is not None:
        return iter_for_shard(shard_id)
    else:
        partial = []
        for shard_id in session.execute_chooser(orm_context):
            result_ = iter_for_shard(shard_id)
            partial.append(result_)
        return partial[0].merge(*partial[1:])