#
#
-# Copyright (C) 2010 Google Inc.
+# Copyright (C) 2010, 2011 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# 02110-1301, USA.
-"""Module for a simple query language"""
+"""Module for a simple query language
+
+A query filter is always a list. The first item in the list is the operator
+(e.g. C{[OP_AND, ...]}), while the other items depend on the operator. For
+logic operators (e.g. L{OP_AND}, L{OP_OR}), they are subfilters whose results
+are combined. Unary operators take exactly one other item (e.g. a subfilter for
+L{OP_NOT} and a field name for L{OP_TRUE}). Binary operators take exactly two
+operands, usually a field name and a value to compare against. Filters are
+converted to callable functions by L{query._CompileFilter}.
+
+"""
+
+import re
+import string # pylint: disable=W0402
+import logging
+
+import pyparsing as pyp
from ganeti import errors
+from ganeti import netutils
+from ganeti import utils
+from ganeti import compat
+# Logic operators with one or more operands, each of which is a filter on its
+# own
OP_OR = "|"
+OP_AND = "&"
+
+
+# Unary operators with exactly one operand
+OP_NOT = "!"
+OP_TRUE = "?"
+
+
+# Binary operators with exactly two operands, the field name and an
+# operator-specific value
OP_EQUAL = "="
+OP_NOT_EQUAL = "!="
+OP_REGEXP = "=~"
+OP_CONTAINS = "=[]"
+
+
+#: Characters used for detecting user-written filters (see L{_CheckFilter})
+FILTER_DETECTION_CHARS = frozenset("()=/!~'\"\\" + string.whitespace)
+
+#: Characters used to detect globbing filters (see L{_CheckGlobbing})
+GLOB_DETECTION_CHARS = frozenset("*?")
-def ReadSimpleFilter(namefield, filter_):
- """Function extracting wanted names from restricted filter.
+def MakeSimpleFilter(namefield, values):
+ """Builds simple a filter.
- This should only be used until proper filtering is implemented. The filter
- must either be empty or of the format C{["|", ["=", field, "name1"], ["=",
- field, "name2"], ...]}.
+ @param namefield: Name of field containing item name
+ @param values: List of names
"""
- if filter_ is None:
- return []
+ if values:
+ return [OP_OR] + [[OP_EQUAL, namefield, i] for i in values]
- if not isinstance(filter_, list):
- raise errors.ParameterError("Filter should be list")
+ return None
- if not filter_ or filter_[0] != OP_OR:
- raise errors.ParameterError("Filter should start with OR operator")
- if len(filter_) < 2:
- raise errors.ParameterError("Invalid filter, OR operator should have"
- " operands")
+def _ConvertLogicOp(op):
+ """Creates parsing action function for logic operator.
- result = []
+ @type op: string
+ @param op: Operator for data structure, e.g. L{OP_AND}
- for idx, item in enumerate(filter_[1:]):
- if not isinstance(item, list):
- raise errors.ParameterError("Invalid OR operator, operand %s not a"
- " list" % idx)
+ """
+ def fn(toks):
+ """Converts parser tokens to query operator structure.
- if len(item) != 3 or item[0] != OP_EQUAL:
- raise errors.ParameterError("Invalid OR operator, operand %s is not an"
- " equality filter" % idx)
+ @rtype: list
+ @return: Query operator structure, e.g. C{[OP_AND, ["=", "foo", "bar"]]}
- (_, name, value) = item
+ """
+ operands = toks[0]
- if not isinstance(value, basestring):
- raise errors.ParameterError("Operand %s for OR should compare against a"
- " string" % idx)
+ if len(operands) == 1:
+ return operands[0]
- if name != namefield:
- raise errors.ParameterError("Operand %s for OR should filter field '%s',"
- " not '%s'" % (idx, namefield, name))
+ # Build query operator structure
+ return [[op] + operands.asList()]
- result.append(value)
+ return fn
- return result
+_KNOWN_REGEXP_DELIM = "/#^|"
+_KNOWN_REGEXP_FLAGS = frozenset("si")
-def MakeSimpleFilter(namefield, values):
- """Builds a filter for use with L{ReadSimpleFilter}.
- @param namefield: Name of field containing item name
- @param values: List of names
+def _ConvertRegexpValue(_, loc, toks):
+ """Regular expression value for condition.
"""
- if values:
- return [OP_OR] + [[OP_EQUAL, namefield, i] for i in values]
+ (regexp, flags) = toks[0]
- return None
+ # Ensure only whitelisted flags are used
+ unknown_flags = (frozenset(flags) - _KNOWN_REGEXP_FLAGS)
+ if unknown_flags:
+ raise pyp.ParseFatalException("Unknown regular expression flags: '%s'" %
+ "".join(unknown_flags), loc)
+
+ if flags:
+ re_flags = "(?%s)" % "".join(sorted(flags))
+ else:
+ re_flags = ""
+
+ re_cond = re_flags + regexp
+
+ # Test if valid
+ try:
+ re.compile(re_cond)
+ except re.error, err:
+ raise pyp.ParseFatalException("Invalid regular expression (%s)" % err, loc)
+
+ return [re_cond]
+
+
+def BuildFilterParser():
+ """Builds a parser for query filter strings.
+
+ @rtype: pyparsing.ParserElement
+
+ """
+ field_name = pyp.Word(pyp.alphas, pyp.alphanums + "_/.")
+
+ # Integer
+ num_sign = pyp.Word("-+", exact=1)
+ number = pyp.Combine(pyp.Optional(num_sign) + pyp.Word(pyp.nums))
+ number.setParseAction(lambda toks: int(toks[0]))
+
+ quoted_string = pyp.quotedString.copy().setParseAction(pyp.removeQuotes)
+
+ # Right-hand-side value
+ rval = (number | quoted_string)
+
+ # Boolean condition
+ bool_cond = field_name.copy()
+ bool_cond.setParseAction(lambda (fname, ): [[OP_TRUE, fname]])
+
+ # Simple binary conditions
+ binopstbl = {
+ "==": OP_EQUAL,
+ "!=": OP_NOT_EQUAL,
+ }
+
+ binary_cond = (field_name + pyp.oneOf(binopstbl.keys()) + rval)
+ binary_cond.setParseAction(lambda (lhs, op, rhs): [[binopstbl[op], lhs, rhs]])
+
+ # "in" condition
+ in_cond = (rval + pyp.Suppress("in") + field_name)
+ in_cond.setParseAction(lambda (value, field): [[OP_CONTAINS, field, value]])
+
+ # "not in" condition
+ not_in_cond = (rval + pyp.Suppress("not") + pyp.Suppress("in") + field_name)
+ not_in_cond.setParseAction(lambda (value, field): [[OP_NOT, [OP_CONTAINS,
+ field, value]]])
+
+ # Regular expression, e.g. m/foobar/i
+ regexp_val = pyp.Group(pyp.Optional("m").suppress() +
+ pyp.MatchFirst([pyp.QuotedString(i, escChar="\\")
+ for i in _KNOWN_REGEXP_DELIM]) +
+ pyp.Optional(pyp.Word(pyp.alphas), default=""))
+ regexp_val.setParseAction(_ConvertRegexpValue)
+ regexp_cond = (field_name + pyp.Suppress("=~") + regexp_val)
+ regexp_cond.setParseAction(lambda (field, value): [[OP_REGEXP, field, value]])
+
+ not_regexp_cond = (field_name + pyp.Suppress("!~") + regexp_val)
+ not_regexp_cond.setParseAction(lambda (field, value):
+ [[OP_NOT, [OP_REGEXP, field, value]]])
+
+ # Globbing, e.g. name =* "*.site"
+ glob_cond = (field_name + pyp.Suppress("=*") + quoted_string)
+ glob_cond.setParseAction(lambda (field, value):
+ [[OP_REGEXP, field,
+ utils.DnsNameGlobPattern(value)]])
+
+ not_glob_cond = (field_name + pyp.Suppress("!*") + quoted_string)
+ not_glob_cond.setParseAction(lambda (field, value):
+ [[OP_NOT, [OP_REGEXP, field,
+ utils.DnsNameGlobPattern(value)]]])
+
+ # All possible conditions
+ condition = (binary_cond ^ bool_cond ^
+ in_cond ^ not_in_cond ^
+ regexp_cond ^ not_regexp_cond ^
+ glob_cond ^ not_glob_cond)
+
+ # Associativity operators
+ filter_expr = pyp.operatorPrecedence(condition, [
+ (pyp.Keyword("not").suppress(), 1, pyp.opAssoc.RIGHT,
+ lambda toks: [[OP_NOT, toks[0][0]]]),
+ (pyp.Keyword("and").suppress(), 2, pyp.opAssoc.LEFT,
+ _ConvertLogicOp(OP_AND)),
+ (pyp.Keyword("or").suppress(), 2, pyp.opAssoc.LEFT,
+ _ConvertLogicOp(OP_OR)),
+ ])
+
+ parser = pyp.StringStart() + filter_expr + pyp.StringEnd()
+ parser.parseWithTabs()
+
+ # Originally C{parser.validate} was called here, but there seems to be some
+ # issue causing it to fail whenever the "not" operator is included above.
+
+ return parser
+
+
+def ParseFilter(text, parser=None):
+ """Parses a query filter.
+
+ @type text: string
+ @param text: Query filter
+ @type parser: pyparsing.ParserElement
+ @param parser: Pyparsing object
+ @rtype: list
+
+ """
+ logging.debug("Parsing as query filter: %s", text)
+
+ if parser is None:
+ parser = BuildFilterParser()
+
+ try:
+ return parser.parseString(text)[0]
+ except pyp.ParseBaseException, err:
+ raise errors.QueryFilterParseError("Failed to parse query filter"
+ " '%s': %s" % (text, err), err)
+
+
+def _IsHostname(text):
+ """Checks if a string could be a hostname.
+
+ @rtype: bool
+
+ """
+ try:
+ netutils.Hostname.GetNormalizedName(text)
+ except errors.OpPrereqError:
+ return False
+ else:
+ return True
+
+
+def _CheckFilter(text):
+ """CHecks if a string could be a filter.
+
+ @rtype: bool
+
+ """
+ return bool(frozenset(text) & FILTER_DETECTION_CHARS)
+
+
+def _CheckGlobbing(text):
+ """Checks if a string could be a globbing pattern.
+
+ @rtype: bool
+
+ """
+ return bool(frozenset(text) & GLOB_DETECTION_CHARS)
+
+
+def _MakeFilterPart(namefield, text):
+ """Generates filter for one argument.
+
+ """
+ if _CheckGlobbing(text):
+ return [OP_REGEXP, namefield, utils.DnsNameGlobPattern(text)]
+ else:
+ return [OP_EQUAL, namefield, text]
+
+
+def MakeFilter(args, force_filter):
+ """Try to make a filter from arguments to a command.
+
+ If the name could be a filter it is parsed as such. If it's just a globbing
+ pattern, e.g. "*.site", such a filter is constructed. As a last resort the
+ names are treated just as a plain name filter.
+
+ @type args: list of string
+ @param args: Arguments to command
+ @type force_filter: bool
+ @param force_filter: Whether to force treatment as a full-fledged filter
+ @rtype: list
+ @return: Query filter
+
+ """
+ if (force_filter or
+ (args and len(args) == 1 and _CheckFilter(args[0]))):
+ try:
+ (filter_text, ) = args
+ except (TypeError, ValueError):
+ raise errors.OpPrereqError("Exactly one argument must be given as a"
+ " filter")
+
+ result = ParseFilter(filter_text)
+ elif args:
+ result = [OP_OR] + map(compat.partial(_MakeFilterPart, "name"), args)
+ else:
+ result = None
+
+ return result