#!/usr/bin/python # -*- coding: utf-8 -*- """Batch (i.e. noninteractive) Lotus Agenda clone. Usage: %s [--html] tagfile itemfile [view definition] This combines a file of items with a file of tags (“categories”, as Agenda called them) to produce either a file of tagged items or a view. To read the items from stdin, use "-" for the itemfile. Agenda’s way of handling tags is somewhat more complex than the approaches more common today, and this program complexifies it further. Tags are arranged in a tree (here actually a DAG), with membership in a child tag logically implying membership in its parent tag, and may also be inferred from the content of items. Also, some parents are marked as “mutually exclusive”, so an item can only be tagged (even implicitly) with up to one of its children. The item file contains one item per line. (Agenda had a facility to associate a blob, called a “note”, with each item. This program doesn’t support notes yet.) Items can be explicitly tagged with a Twitter-style hashtag on the line (“#todo”), or they can acquire their tags implicitly from the tag file. The tag name can be followed a value preceded by an “=”, and the value can either be enclosed in quotes or contain no spaces. By default, this program produces a fully-explicitly-tagged version of the input item file, which is useful mostly for debugging. The tag file contains a hierarchy of tags, one per line, with the hierarchy represented with indentation. Tag names are unique, so if you use the same tag in more than one place, it has multiple parent categories. This can also be useful for keeping your indentation from getting too deep. For example, this tag file has three immediate subcategories under “food”, one of which has two subcategories: food fruit bread milk fruit apple banana By default, if the name of a tag occurs (case-insensitively) in an item, that item is automatically tagged with that tag. You can suppress this implicit content-based tagging by preceding the tag in the tag file with a “#”; in that case the tag in the item file will also have to be preceded with an explicit “#” in order to be tagged. This suppression is handy for tags that are also common words. You can supply additional space-separated synonyms that will cause a tag to be implicitly applied to a line. For example, the tag definition ”#xx female woman” will apply the tag “#xx” to lines containing “female” or “woman”. A synonym can be a hashtag marked with a “#”, in which case it will apply even if that tag was inferred, rather than being explicitly present in the input. For more powerful implicit tagging, you can also specify Python regular expressions between “//” delimiters. If the regular expression contains a “()” capturing group, the captured content will be used as the value of the tag. For example, the tag definition “price / \$([\d.]+)/” will apply the tag “#price=53.33” to the line “tickets $53.33”. To make a tag’s children mutually exclusive, put a colon (“:”) after the tag name. This ensures that implicit tagging will not apply any further child tags to that item once one of them has been applied, either implicitly or explicitly. They are considered in order, so you should place less specific implicit rules later. Finally, for a tag to be implicitly inherited from one item to all following items until it’s overridden (either by mutual exclusion or by a new value being found), put an “*” at the end of the tag name. This can be either before or after the “:”, if any. To summarize, the tag file can specify - tag names - parent tags (by indentation) - mutual exclusion (by “:” on the end of the tag name) - implicit inheritance to all following lines (by “*” on the end of the tag name) - disabling the default implicit tagging (by “#” on the beginning of the tag name) - synonyms for implicit tagging (space-separated words) - regular expressions for implicit tagging, including content capture For more useful output than merely a fully-tagged copy of the input, which might still be kind of useful for grepping, you can define a view with the following magic notation after the name of the item file on the command line: - “tag” specifies to omit all items not tagged with “tag”. - “-tag” specifies to omit all items tagged with “tag”. - “tag”, “=”, “<=”, or “>=” in place of “<”. Note that in most cases you will need to put quotes around these queries on the command line to keep the shell from interpreting them as I/O redirections. - “^tag” specifies to sort the output by the value of “tag”, which will be displayed as a column. This is similar to, but not quite the same as “=”: - “=tag” specifies to divide the output into sections that have the same value of “tag”, with a header line above each one. - “@tag” specifies to display the value of “tag” as a column in the output, with a total at the bottom, if its values are all numeric. If the output is divided into sections, each section will have a subtotal. BUGS: (including unimplemented features) - range queries probably ought to do numeric comparisons if both the key and the tag value are numeric - range queries probably ought to exclude items that don’t have the specified tag - text synonyms aren’t handled yet (though regexps and synonym tags are, including multiple ones) - HTML output doesn’t cover the view-definition-less case yet - views don’t yet support sort-by queries (^) - handles Unicode tags unreliably due to limited list of “letters” - doesn’t yet support disabling implicit tagging (leading # in tag file) - it’s kind of slow, although not quite as bad as before - HTML output doesn’t cover the view-definition-less case yet - inherited tags don’t bring along their ancestors - inherited tags should probably cause their descendants to be inherited too - section totals aren’t optional - tags with values can only have one value (while you can quite reasonably have multiple subtags) - if you have multiple subtags in a field you define a column for, they are displayed on the same line - datetime fields like .month and .day aren’t supported yet - you can’t yet set multiple fields in a single regexp - there’s no way to get item numbers (line numbers) - there’s no way to suppress the original items from the view - no way to use a text search condition other than adding it to the schema as a tag - maybe inheritance to the next line should be specified by the view rather than the schema? """ import cgitb import re import sys import nestedtables def main(argv): reload(sys) # WTF WTF WTF sys.setdefaultencoding('utf-8') # have to do this after reload(sys) cgitb.enable(format='text') if len(argv) > 1 and argv[1] == '--html': argv.pop(1) html = True else: html = False if len(argv) < 3: sys.stderr.write(__doc__ % argv[0]) return -1 tags = parse(open(argv[1])) if len(argv) > 3: view = make_view(argv[3:], tags, html) else: view = None if view is not None: view.open() with open_items(argv[2]) as infile: for item in infile: while item.endswith('\n'): item = item[:-1] item = item.decode('utf-8') tagdata = tags.tag(item) if view is None: sys.stdout.write(render(item, tagdata) + '\n') else: view.see_item(item, tagdata) if view is not None: view.close() def open_items(name): if name == '-': return StdinContextManager() else: return open(name) class StdinContextManager: def __enter__(self): return sys.stdin def __exit__(*args): pass def make_view(terms, schema, html): return View(terms, schema, html) class View: def __init__(self, terms, schema, html): self.schema = schema self.html = html self.required = [] self.excluded = [] self.ranges = [] self.sorts = [] self.section_tags = [] self.totals = [] self.by_section = {} for term in terms: term = term.decode('utf-8') if term.startswith('-'): self.excluded.append(schema.validate_tag(term[1:])) elif term.startswith('^'): self.sorts.append(schema.validate_tag(term[1:])) elif term.startswith('='): self.section_tags.append(schema.validate_tag(term[1:])) elif term.startswith('@'): self.totals.append(schema.validate_tag(term[1:])) elif '<' in term or '>' in term or '=' in term: self.ranges.append(self.parse_range_query(term)) else: self.required.append(schema.validate_tag(term)) def parse_range_query(self, s): schema = self.schema if '<=' in s: tag, key = s.split('<=') tag = schema.validate_tag(tag) return lambda tagdata: schema.tag_value(tagdata, tag) <= key elif '<' in s: tag, key = s.split('<') tag = schema.validate_tag(tag) return lambda tagdata: schema.tag_value(tagdata, tag) < key elif '>=' in s: tag, key = s.split('>=') tag = schema.validate_tag(tag) return lambda tagdata: schema.tag_value(tagdata, tag) >= key elif '>' in s: tag, key = s.split('>') tag = schema.validate_tag(tag) return lambda tagdata: schema.tag_value(tagdata, tag) > key elif '=' in s: tag, key = s.split('=') tag = schema.validate_tag(tag) return lambda tagdata: schema.tag_value(tagdata, tag) == key else: raise UnparsableRangeQuery(s) def see_item(self, item, tagdata): if not self.matches(tagdata): return section = tuple(self.schema.tag_value(tagdata, tag) for tag in self.section_tags) # When not sectioned or sorted, output immediately. if not section: self.output_item(item, tagdata) else: if section not in self.by_section: self.by_section[section] = [] self.by_section[section].append((item, tagdata)) def output_item(self, item, tagdata): row = [] for ii, tag in enumerate(self.totals): value = self.schema.tag_value(tagdata, tag) row.append(value) if self.total_quantities[ii] is None or value == '': continue try: total_quantity = float(value) except ValueError: self.total_quantities[ii] = None else: self.total_quantities[ii] += total_quantity row.append(item) # Maybe store sort keys and items somewhere besides # current_table? If we generated the headers at the end, we # could also omit empty columns. self.current_table.append(row) def matches(self, tagdata): return (all(term in tagdata for term in self.required) and not any(term in tagdata for term in self.excluded) and all(matcher(tagdata) for matcher in self.ranges)) def open(self): if self.html: sys.stdout.write(""" batchagenda out\n""") if not self.section_tags: self.open_section() def open_section(self, name=None, count=None): if name is not None: sys.stdout.write('\n%s%s:\n' % (' '.join(v for v in name if v != ''), '' if count is None else " (%d)" % count)) self.total_quantities = [0] * len(self.totals) if self.totals: self.current_table = [self.totals + ['']] else: self.current_table = [] def close(self): if not self.section_tags: self.close_section() else: for section in sorted(self.by_section): items = self.by_section[section] self.open_section(section, len(items)) for item, tagdata in items: self.output_item(item, tagdata) self.close_section() if self.html: sys.stdout.write("""\n""") def close_section(self): if any(quantity is not None for quantity in self.total_quantities): self.current_table.append([format_total(quantity) for quantity in self.total_quantities] + ['']) self.render(self.current_table) def render(self, table): if self.html: sys.stdout.write(nestedtables.html_render(table)) else: for line in nestedtables.render(table): sys.stdout.write(line.rstrip() + '\n') def format_total(total): if total is None: return '' if total == int(total): return '%d' % total return '%s' % total class NonexistentTag(Exception): pass class UnparsableRangeQuery(Exception): pass def parse(tagfile): schema = Schema() indents = [] parents = [] wsp = re.compile(r'\s*') word = re.compile(ur'[-+\wüóéí]+') # XXX include other letters! rere = re.compile(r'/((?:[^\\/]|\\.)*)/') synonymre = re.compile(ur'#([-\wüóéí]+)') for line in tagfile: line = line.decode('utf-8') tagname_mo = word.search(line) if not tagname_mo: continue # blank line, basically tagname = tagname_mo.group(0) indent = wsp.match(line).group(0) if indent in indents: depth = indents.index(indent) indents = indents[:depth+1] parents = parents[:depth] + [tagname] elif indents == [] or indent.startswith(indents[-1]): indents.append(indent) parents.append(tagname) else: raise TagSyntaxError("bad indent", line, indents) schema.add_tag(tagname) if len(parents) > 1: schema.add_parent(tagname, parents[-2]) if line[:tagname_mo.start()].endswith('#'): schema.make_nonimplicit(tagname) tail = line[tagname_mo.end():] if tail.startswith(':*') or tail.startswith('*'): schema.make_inherited(tagname) if tail.startswith('*:') or tail.startswith(':'): schema.make_mutually_exclusive(tagname) for pattern in rere.finditer(line): schema.add_pattern(tagname, pattern.group(1)) for synonym_tag in synonymre.finditer(line): schema.add_synonym_tag(tagname, synonym_tag.group(1)) return schema class TagSyntaxError(Exception): pass class Schema: def __init__(self): self.tags = [] self._parents = {} self.patterns = {} self.inherited_tags = set() self.mutually_exclusive_tags = set() self.nonimplicit_tags = set() self.currently_inherited = {} self.synonym_tags = {} self.ancestor_cache = {} self._parent_exclusions = {} def add_tag(self, tagname): if tagname not in self.tags: self.tags.append(tagname) if tagname not in self._parents: self._parents[tagname] = set() def validate_tag(self, tag): if tag not in self.tags: raise NonexistentTag(tag) return tag def add_parent(self, tagname, parent): assert tagname not in self.ancestors(parent), (tagname, parent) self.ancestor_cache = {} self._parent_exclusions = {} self._parents[tagname].add(parent) def parents(self, tag): return self._parents[tag] def add_pattern(self, tagname, pattern): if tagname not in self.patterns: self.patterns[tagname] = [] self.patterns[tagname].append(re.compile(pattern)) def add_synonym_tag(self, tagname, synonym): # This is a misnomer. The idea is that when we tag an item # with `synonym`, we also want to add `tagname` unless there’s # some reason we shouldn’t, such as a mutually exclusive # existing tag. if self.validate_tag(tagname) not in self.synonym_tags: self.synonym_tags[tagname] = [] self.synonym_tags[tagname].append(synonym) def make_mutually_exclusive(self, tagname): self.mutually_exclusive_tags.add(tagname) def make_inherited(self, tagname): self.inherited_tags.add(tagname) def make_nonimplicit(self, tagname): self.nonimplicit_tags.add(tagname) def tag_value(self, tagdata, tag): "Returns a string value for `tag` from `tagdata`." if tagdata.get(tag) is not None: return tagdata[tag] return ' '.join(sorted(subtag for subtag in tagdata if tag in self.parents(subtag))) def ancestors(self, tag): try: return self.ancestor_cache[tag] except KeyError: pass ancestors = {} pending = set([tag]) while pending: ancestor = pending.pop() for parent in self.parents(ancestor): if parent not in ancestors: pending.add(parent) ancestors[parent] = None self.ancestor_cache[tag] = ancestors return ancestors def parent_exclusions(self, tag): try: return self._parent_exclusions[tag] except KeyError: pass parents = self.parents(tag) rv = {p: tag for p in parents if p in self.mutually_exclusive_tags} for p in parents: rv = hash_union(rv, self.parent_exclusions(p)) self._parent_exclusions[tag] = rv return rv def tag(self, item): autotags = {} exclusions = {} for tag in self.tags: new_exclusions = self.parent_exclusions(tag) if not self.exclusion_conflict(new_exclusions, exclusions): new_tags = self.tag_with(tag, item, autotags) for new_tag in new_tags: if new_tag not in autotags: autotags[new_tag] = new_tags[new_tag] exclusions.update(self.parent_exclusions(new_tag)) # XXX also read the explicit tags for tag in self.currently_inherited: new_exclusions = self.parent_exclusions(tag) if autotags.has_key(tag): continue if self.exclusion_conflict(exclusions, new_exclusions): continue autotags[tag] = self.currently_inherited[tag] exclusions.update(new_exclusions) # XXX also inherit ancestors and descendants of self.inherited_tags self.currently_inherited = {k: autotags[k] for k in autotags if k in self.inherited_tags} return autotags def exclusion_conflict(self, a, b): return any(a[k] != b[k] for k in a.keys() if b.has_key(k)) def tag_with(self, tag, item, existing_tags): tags = {} for pattern in self.patterns.get(tag, []): mo = pattern.search(item) if mo is not None: if mo.groups(): tags[tag] = mo.group(1) else: tags[tag] = None tags.update(self.ancestors(tag)) return tags if (tag not in self.nonimplicit_tags and tag.lower() in item.lower() or any(synonym in existing_tags for synonym in self.synonym_tags.get(tag, []))): tags[tag] = None tags.update(self.ancestors(tag)) return tags def hash_union(a, b): if not a: return b elif not b: return a rv = {} rv.update(a) rv.update(b) return rv def render(item, autotags): return item + ''.join(' #'+tag if autotags[tag] is None else ' #'+tag+'='+autotags[tag] for tag in autotags if '#'+tag not in item) if __name__ == '__main__': sys.exit(main(sys.argv))