Source code for dragontide._grammars

from collections import Counter

from dragonfly import Grammar

from dragontide._specparsers import _XmlSpecParser
from dragontide._support import _first_not_none, _safe_kwargs


[docs]class Registry(object): """ A registry maintains information about a set of known active rules and the `literal tags <literalization>` that must precede their `intros <intros>` when their commands are meant as free speech dictation. Working directly with a Registry object is an advanced use case. A registry exposes services regarding inspection and parsing of utterances as it relates to its literal tags and currently actively registered commands. """ literal_tags = ["English", "english", "literal"] """ `Literal tags <literalization>` are used during speech to indicate that what follows is not a command. Registry object's initialize with these default values. """
[docs] def __init__(self, literal_tags=[], override_tags=False): """ :param literal_tags: These words will function as `literalization <literalization>` markers to indicate that what follows is not a command, but rather free speech dictation. :type literal_tags: string list :param bool override_tags: If False, the literal_tags supplied to __init__ will be added to the defaults, otherwise they will replace them. """ self.literal_tags = literal_tags if not override_tags: self.literal_tags += Registry.literal_tags self._registered_commands = Counter() self._command_partials = Counter()
[docs] def translate_literals(self, words_iterable): """ Returns a list of words, stripped of :term:`literal tags <literal tag>` in a semantically meaningful way. Final isolated literal_tag's are stripped. When a literal_tag precedes a literal_tag, the second occurrence only is retained. In a string of all literal_tag's, exactly the odd indexed ones (in a 0-indexed sense) would be returned. """ translation = [] words_iterator = iter(words_iterable) for word in words_iterator: if word in self.literal_tags: try: word = words_iterator.next() translation.append(word) except StopIteration: break else: translation.append(word) return translation
def _get_literal_tag_indices(self, words_iterable): """ Returns a list of indices where literal tags occur for the purpose of being literal tags. """ indices = [] words_iterator = enumerate(words_iterable) for i, word in words_iterator: if word in self.literal_tags: indices.append(i) words_iterator.next() # skip the next i, word pair return indices
[docs] def register_rule(self, rule): """ Adds the rule to a list of known active rules. Not generally called directly by users. For more information see the `registration <registration>` concept section. """ intros = self._get_intros(rule) partials = self._get_partials(rule, intros) self._registered_commands.update(intros) self._command_partials.update(partials)
[docs] def unregister_rule(self, rule): """ Removes the rule from the list of known active rules. Not generally called directly by users. """ intros = self._get_intros(rule) partials = self._get_partials(rule, intros) self._registered_commands.subtract(intros) self._command_partials.subtract(partials)
[docs] def is_registered(self, intro): """ :param string command_intro: A command :term:`intro` to test for `registration <registration>`. :returns: True if registered, False otherwise :rtype: bool """ return self._registered_commands[intro] > 0
[docs] def has_partial(self, partial_command): """ Returns True if the string supplied is an initial substring of a registered intro, assuming only full words are supplied. """ return self._command_partials[partial_command] > 0
[docs] def starts_with_registered(self, words_iterable): """ Returns True if the iterable of strings begins with the words of a registered command. """ running_match = "" words_iterator = iter(words_iterable) for word in words_iterator: if word in self.literal_tags: words_iterator.next() continue if running_match: running_match += " " running_match += word if self.is_registered(running_match): return True elif not self.has_partial(running_match): return False
def _determine_command_index(self, dictation_words): if not dictation_words: return None word_count = len(dictation_words) start_index = 0 while start_index < word_count: if dictation_words[start_index] in self.literal_tags: start_index += 2 continue words_iterable = ( dictation_words[i] for i in xrange(start_index, word_count) ) if self.starts_with_registered(words_iterable): return start_index start_index += 1 return word_count def _split_dictation(self, dictation): return self._split_dictation_words_list(dictation.words) def _split_dictation_words_list(self, dictation_words_list): if not dictation_words_list: return None, None command_index = self._determine_command_index(dictation_words_list) if command_index is None: # indicates an error return None, None return ( dictation_words_list[:command_index], dictation_words_list[command_index:], ) @staticmethod def _determine_intros(rule): """ Expected to be able to accept any spec as long as it is well-formed: - balanced parentheses and brackets - contains no { or } characters - outside of <extra> references, contains no < or > characters This could be further enhanced to extract string parts from Option elements e.g. spec = "select <direction> word" extras = (Choice("direction", {"left":"left", "right":"right"}), ) ### intros --> ["select right word", "select left word"] """ if rule._intros: return rule._intros else: intros_spec = _first_not_none( getattr(rule, "_intros_spec", None), getattr(rule, "_spec", None) ) if not intros_spec: return None return Registry._parse_spec(intros_spec) @staticmethod def _determine_partials(rule, intros=None): partials = [] intros = _first_not_none(intros, Registry._get_intros(rule)) for intro in intros: position = intro.rfind(" ") while position != -1: # -1 means down to final word, not a partial partials.append(intro[0:position]) position = intro.rfind(" ", 0, position) return partials @staticmethod def _get_intros(rule): if getattr(rule, "_is_registered", False): if not rule._determined_intros: rule._determined_intros = Registry._determine_intros(rule) return rule._determined_intros else: return [] @staticmethod def _get_partials(rule, intros=None): if getattr(rule, "_is_registered", False): if not rule._determined_partials: rule._determined_partials = Registry._determine_partials(rule, intros) return rule._determined_partials else: return [] @staticmethod def _parse_spec(spec): try: parser = _XmlSpecParser(spec) return parser.get_intros() except: print("Registry could not parse this spec for intros:", spec) return None
[docs]class RegistryGrammar(Grammar): """ A RegistryGrammar is like a normal Grammar_ object, except it registers and unregisters `RegisteredRule`'s as they are activated and deactivated, maintaining a registry of those that are currently active. `ContinuingRule`'s that are added to this grammar will automatically use this object's registry when seeking out commands embedded in utterances. """
[docs] def __init__(self, name, registry=None, **kwargs): """ :param name: Passed to dragonfly Grammar_ :param Registry registry: The Registry object that serves as the active `registration` list. It may be shared across RegistryGrammar instances. If None, a local Registry object is created. :param \*\*kwargs: Passed safely to dragonfly Grammar_ """ self.registry = _first_not_none(registry, Registry()) _safe_kwargs(Grammar.__init__, self, name, **kwargs)
# override -- you're not expected to need to know this is in place
[docs] def activate_rule(self, rule): if getattr(rule, "_is_registered", False): self.registry.register_rule(rule) Grammar.activate_rule(self, rule)
# override -- you're not expected to need to know this is in place
[docs] def deactivate_rule(self, rule): if getattr(rule, "_is_registered", False): self.registry.unregister_rule(rule) Grammar.deactivate_rule(self, rule)
# override -- you're not expected to need to know this is in place
[docs] def unload(self): for rule in self._rules: # unregister to prevent multiply registered rules during restart rule.deactivate() Grammar.unload(self)
[docs]class GlobalRegistry(RegistryGrammar): """ The GlobalRegistry is a `RegistryGrammar` with a single globally shared `Registry`. It can be used as the Grammar_ object across many files, allowing the rules to know about each other for chaining. """ registry = Registry()
[docs] def __init__(self, name, description=None, context=None, engine=None, **kwargs): """ :param name: Passed to dragonfly Grammar_ :param description: Passed to dragonfly Grammar_ :param context: Passed to dragonfly Grammar_ :param engine: Passed to dragonfly Grammar_ :param \*\*kwargs: Passed to `RegistryGrammar` """ kwargs["description"] = description kwargs["context"] = context kwargs["engine"] = engine RegistryGrammar.__init__(self, name, self.registry, **kwargs)