Source code for dragontide._grammars

from collections import Counter

from dragonfly import Grammar

from dragontide._specparsers import _XmlSpecParser
from dragontide._support import _first_not_none, _safe_kwargs


[docs]class Registry(object):
    """
    A registry maintains information about a set of known active rules and the
    `literal tags <literalization>` that must precede their `intros <intros>`
    when their commands are meant as free speech dictation.

    Working directly with a Registry object is an advanced use case.

    A registry exposes services regarding inspection and parsing of utterances
    as it relates to its literal tags and currently actively registered
    commands.
    """

    literal_tags = ["English", "english", "literal"]
    """
    `Literal tags <literalization>` are used during speech to indicate that what
    follows is not a command. Registry object's initialize with these default values.
    """

[docs]    def __init__(self, literal_tags=[], override_tags=False):
        """
        :param literal_tags: These words will function as `literalization
            <literalization>` markers to indicate that what
            follows is not a command, but rather free speech dictation.
        :type literal_tags: string list
        :param bool override_tags: If False, the literal_tags supplied to
            __init__ will be added to the defaults, otherwise they will
            replace them.
        """
        self.literal_tags = literal_tags
        if not override_tags:
            self.literal_tags += Registry.literal_tags
        self._registered_commands = Counter()
        self._command_partials = Counter()

[docs]    def translate_literals(self, words_iterable):
        """
        Returns a list of words, stripped of :term:`literal tags <literal tag>`
        in a semantically meaningful way. Final isolated literal_tag's are
        stripped.

        When a literal_tag precedes a literal_tag, the second occurrence only
        is retained.

        In a string of all literal_tag's, exactly the odd indexed ones
        (in a 0-indexed sense) would be returned.
        """
        translation = []
        words_iterator = iter(words_iterable)

        for word in words_iterator:
            if word in self.literal_tags:
                try:
                    word = words_iterator.next()
                    translation.append(word)
                except StopIteration:
                    break
            else:
                translation.append(word)

        return translation

    def _get_literal_tag_indices(self, words_iterable):
        """
        Returns a list of indices where literal tags occur for the purpose of
        being literal tags.
        """
        indices = []
        words_iterator = enumerate(words_iterable)

        for i, word in words_iterator:
            if word in self.literal_tags:
                indices.append(i)
                words_iterator.next()  # skip the next i, word pair

        return indices

[docs]    def register_rule(self, rule):
        """
        Adds the rule to a list of known active rules. Not generally called
        directly by users. For more information see
        the `registration <registration>` concept section.
        """
        intros = self._get_intros(rule)
        partials = self._get_partials(rule, intros)
        self._registered_commands.update(intros)
        self._command_partials.update(partials)

[docs]    def unregister_rule(self, rule):
        """
        Removes the rule from the list of known active rules. Not generally
        called directly by users.
        """
        intros = self._get_intros(rule)
        partials = self._get_partials(rule, intros)
        self._registered_commands.subtract(intros)
        self._command_partials.subtract(partials)

[docs]    def is_registered(self, intro):
        """
        :param string command_intro: A command :term:`intro` to test for
            `registration <registration>`.
        :returns: True if registered, False otherwise
        :rtype: bool
        """
        return self._registered_commands[intro] > 0

[docs]    def has_partial(self, partial_command):
        """
        Returns True if the string supplied is an initial substring of a
        registered intro, assuming only full words are supplied.
        """
        return self._command_partials[partial_command] > 0

[docs]    def starts_with_registered(self, words_iterable):
        """
        Returns True if the iterable of strings begins with the words of a
        registered command.
        """
        running_match = ""
        words_iterator = iter(words_iterable)
        for word in words_iterator:
            if word in self.literal_tags:
                words_iterator.next()
                continue

            if running_match:
                running_match += " "
            running_match += word
            if self.is_registered(running_match):
                return True
            elif not self.has_partial(running_match):
                return False

    def _determine_command_index(self, dictation_words):
        if not dictation_words:
            return None

        word_count = len(dictation_words)
        start_index = 0
        while start_index < word_count:
            if dictation_words[start_index] in self.literal_tags:
                start_index += 2
                continue
            words_iterable = (
                dictation_words[i] for i in xrange(start_index, word_count)
            )
            if self.starts_with_registered(words_iterable):
                return start_index
            start_index += 1
        return word_count

    def _split_dictation(self, dictation):
        return self._split_dictation_words_list(dictation.words)

    def _split_dictation_words_list(self, dictation_words_list):
        if not dictation_words_list:
            return None, None
        command_index = self._determine_command_index(dictation_words_list)
        if command_index is None:  # indicates an error
            return None, None
        return (
            dictation_words_list[:command_index],
            dictation_words_list[command_index:],
        )

    @staticmethod
    def _determine_intros(rule):
        """
        Expected to be able to accept any spec as long as it is well-formed:
        - balanced parentheses and brackets
        - contains no { or } characters
        - outside of <extra> references, contains no < or > characters

        This could be further enhanced to extract string parts from Option elements
        e.g.    spec = "select <direction> word"
                extras = (Choice("direction", {"left":"left", "right":"right"}), )
                ### intros --> ["select right word", "select left word"]
        """
        if rule._intros:
            return rule._intros
        else:
            intros_spec = _first_not_none(
                getattr(rule, "_intros_spec", None), getattr(rule, "_spec", None)
            )
            if not intros_spec:
                return None
            return Registry._parse_spec(intros_spec)

    @staticmethod
    def _determine_partials(rule, intros=None):
        partials = []
        intros = _first_not_none(intros, Registry._get_intros(rule))
        for intro in intros:
            position = intro.rfind(" ")
            while position != -1:  # -1 means down to final word, not a partial
                partials.append(intro[0:position])
                position = intro.rfind(" ", 0, position)
        return partials

    @staticmethod
    def _get_intros(rule):
        if getattr(rule, "_is_registered", False):
            if not rule._determined_intros:
                rule._determined_intros = Registry._determine_intros(rule)
            return rule._determined_intros
        else:
            return []

    @staticmethod
    def _get_partials(rule, intros=None):
        if getattr(rule, "_is_registered", False):
            if not rule._determined_partials:
                rule._determined_partials = Registry._determine_partials(rule, intros)
            return rule._determined_partials
        else:
            return []

    @staticmethod
    def _parse_spec(spec):
        try:
            parser = _XmlSpecParser(spec)
            return parser.get_intros()
        except:
            print("Registry could not parse this spec for intros:", spec)
            return None


[docs]class RegistryGrammar(Grammar):
    """
    A RegistryGrammar is like a normal Grammar_ object, except it registers
    and unregisters `RegisteredRule`'s as they are activated and deactivated,
    maintaining a registry of those that are currently active.

    `ContinuingRule`'s that are added to this grammar will automatically use
    this object's registry when seeking out commands embedded in utterances.
    """

[docs]    def __init__(self, name, registry=None, **kwargs):
        """
        :param name: Passed to dragonfly Grammar_
        :param Registry registry: The Registry object that serves as the
            active `registration` list. It may be shared across
            RegistryGrammar instances. If None, a local Registry object is
            created.
        :param \*\*kwargs: Passed safely to dragonfly Grammar_
        """
        self.registry = _first_not_none(registry, Registry())
        _safe_kwargs(Grammar.__init__, self, name, **kwargs)

    # override -- you're not expected to need to know this is in place
[docs]    def activate_rule(self, rule):
        if getattr(rule, "_is_registered", False):
            self.registry.register_rule(rule)
        Grammar.activate_rule(self, rule)

    # override -- you're not expected to need to know this is in place
[docs]    def deactivate_rule(self, rule):
        if getattr(rule, "_is_registered", False):
            self.registry.unregister_rule(rule)
        Grammar.deactivate_rule(self, rule)

    # override -- you're not expected to need to know this is in place
[docs]    def unload(self):
        for rule in self._rules:
            # unregister to prevent multiply registered rules during restart
            rule.deactivate()
        Grammar.unload(self)


[docs]class GlobalRegistry(RegistryGrammar):
    """
    The GlobalRegistry is a `RegistryGrammar` with a single globally shared
    `Registry`. It can be used as the Grammar_ object across many files,
    allowing the rules to know about each other for chaining.
    """

    registry = Registry()

[docs]    def __init__(self, name, description=None, context=None, engine=None, **kwargs):
        """
        :param name: Passed to dragonfly Grammar_
        :param description: Passed to dragonfly Grammar_
        :param context: Passed to dragonfly Grammar_
        :param engine: Passed to dragonfly Grammar_
        :param \*\*kwargs: Passed to `RegistryGrammar`
        """
        kwargs["description"] = description
        kwargs["context"] = context
        kwargs["engine"] = engine
        RegistryGrammar.__init__(self, name, self.registry, **kwargs)