#!/usr/bin/python import re import StringIO # Instead of 74K per second, this runs at five or ten megabytes per # second on my test files, the same speed as Python's built-in # iterator over text file lines. class BackwardLines: def __init__(self, fo, bufsiz = 262144): self.fo = fo self.bufsiz = bufsiz def __iter__(self): tail = '' SEEK_SET, SEEK_CUR, SEEK_END = 0, 1, 2 self.fo.seek(0, SEEK_END) pos = self.fo.tell() while True: if pos == 0: break old_pos = pos pos = max(0, pos - self.bufsiz) self.fo.seek(pos, SEEK_SET) buf = self.fo.read(old_pos - pos) # usually bufsiz lines = re.split('([^\n]*\n)', buf) lines[-1] += tail # lines[-1] may be empty but won't end with \n lines = [line for line in lines if line] tail = lines.pop(0) for line in reversed(lines): yield line if tail: yield tail test_string = ''' a cd efg hijk lmnop qrstuv wxyzABC DEFGHIJK LMNOPQRST ''' test_string_lines = ['\n', 'a\n', 'cd\n', 'efg\n', 'hijk\n', 'lmnop\n', 'qrstuv\n', 'wxyzABC\n', 'DEFGHIJK\n', 'LMNOPQRST\n'] reversed_lines = list(reversed(test_string_lines)) def test(): for bufsiz in range(1, len(test_string)+3): lines = list(BackwardLines(StringIO.StringIO(test_string), bufsiz)) assert lines == reversed_lines, (lines, reversed_lines, bufsiz) test()