#!/usr/bin/python # -*- coding: utf-8 -*- """A spike to see about font cascades with a fallback in Reportlab. The issue is that in Dercuano I'm using ET Book (etbook/et-book-roman-old-style-figures.ttf and friends) and I use a lot of characters that are not in ET Book. So for those characters I need to switch to a different font, such as FreeSerif. Fundamentally this is not too terrible: I need to iterate over the characters of the string I want to draw, switching fonts appropriately. This also requires me to do the same switcheroo for string width in order to fill paragraphs properly. """ import random import sys from reportlab.pdfgen.canvas import Canvas from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.cidfonts import UnicodeCIDFont class Cascade: """A font cascade is similar to a font (modulo point size) with fallback. For code points not in the most preferred font, it falls back to less-preferred fonts, finally using the fallback font if nothing else has it. To do this, it must be able to figure out if a font has a given code point or not, which I don't know how to do for arbitrary Reportlab fonts. But I do know how to do it for TrueType fonts, so those are the ones you can use in the cascade list. """ def __init__(self, fonts, fallback): """Parameter ``fonts`` is a TTFont list, which have .face.charToGlyph. Parameter ``fallback`` can be any Reportlab font; it will be used peremptorily for any codepoint not found in ``fonts``. """ self.fonts = fonts self.fallback = fallback self.string_cache = {} self.char_cache = {} def find_fonts(self, string): "Map a string of chars to (char, font) pairs." char_cache = self.char_cache for c in string: if c in char_cache: yield c, char_cache[c] continue for f in self.fonts: if ord(c) in f.face.charToGlyph: if len(char_cache) > 1024: char_cache.clear() char_cache[c] = f yield c, f break else: yield c, self.fallback def _map(self, string): last_font = object() rv = [] for c, f in self.find_fonts(string): if f == last_font: chars.append(c) else: chars = [c] rv.append((f, chars)) last_font = f return tuple((f, ''.join(chars)) for f, chars in rv) def map(self, string): "Map a string to a tuple of (font, substring) pairs." if string not in self.string_cache: if len(self.string_cache) > 1024: self.string_cache.clear() self.string_cache[string] = self._map(string) return self.string_cache[string] def width(self, string, size): "Equivalent of font.stringWidth or canvas.stringWidth." return sum(f.stringWidth(s, size) for f, s in self.map(string)) def textOut(self, textobject, string, size): "Equivalent of textobject.textOut." for f, s in self.map(string): print(u"Setting '%s' in %s" % (s, f.fontName)) textobject.setFont(f.fontName, size) textobject.textOut(s) def main(): c = Canvas('hellocascade.pdf', pagesize=(72*3, 72*2), invariant=True) s = [u'⸘eh‽ Liberation Serif covers ≈, †, ∞,', u'←↓↑→, ² and ³, and Greek, KΚορη, but', u'not ⁻⁶ or ɑ or ₂ or ⁴⁸ or ℤ. Whee! 你好!'] f = TTFont('ET-Book-Roman', 'etbook/et-book-roman-old-style-figures.ttf') pdfmetrics.registerFont(f) fallback0 = TTFont('DejaVuSerif', '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf') pdfmetrics.registerFont(fallback0) fallback1 = TTFont('FreeSerif', '/usr/share/fonts/truetype/freefont/FreeSerif.ttf') pdfmetrics.registerFont(fallback1) stsong_light = UnicodeCIDFont('STSong-Light') pdfmetrics.registerFont(stsong_light) cascade = Cascade([f, fallback1], stsong_light) c.setFont(f.fontName, 12) t = c.beginText(6, 72*2-18) for line in s: print(u"Width of '%s' is %s." % (line, cascade.width(line, size=12))) cascade.textOut(t, line, size=12) t.textLine() words = [u'πορτοκαλη', u'Περσεφόνη', u'Άδης', u'ουχ', u'ο', u'η', u'βούλομαι', u'ἐλεύσομαι', u'Σοκρατης', u'Πλατον', u'Διογηνης', u'τα', u'είναι', u'μία', u'你好', u'我们', u'不', u'boludo', u'estás', u'en', u'el', u'ñoqui', u'pelotuda', u'qué', u'carajo', u'sos', u'hadix', u'颱風', u'天气', u'水', u'ℤ', u'⇒', u'∃', u'∈', u'†', u'2.5ℓ', u'v⃗', # Enough Farsi to see that RtL and Arabic ligatures are totally # not handled: u'ن سینا‎', u'الجبر‎', u'محمد بن', # Devanagari, including combining vowels, seems to be reasonably # okay, though the vertical alignment leaves something to be desired: u'जुड़े', u'से', u'अल्गोरिद्म'] generator = random.Random(int(sys.argv[1]) if len(sys.argv) > 1 else 1) line = ' ' capitalize = True for i in range(30): word = generator.choice(words) if capitalize: word = word.capitalize() capitalize = False if not generator.randrange(5): word += '. ' capitalize = True possible = line + ' ' + word if cascade.width(possible, size=12) > 72*3 - 12: cascade.textOut(t, line, size=12) t.textLine() line = word else: line = possible cascade.textOut(t, line if capitalize else line + '.', size=12) c.drawText(t) c.save() if __name__ == '__main__': main()