#!/usr/bin/python3 """Generate CSV from some Markdown with RFC-822 headers in it. I wrote a file for a survey of parsers, and this script puts its regular part into CSV form. """ import csv import sys def parse(infile): prev_line = None items = [] seeking_headers = False canonical_header_names = {} current_header_name = None for raw_line in infile: line = raw_line.strip() if len(line) > 3 and all(c == '-' for c in line): items.append((prev_line, {})) seeking_headers = True elif seeking_headers and ': ' in line: name, value = line.split(': ', 1) if ' ' in name: seeking_headers = False current_header_name = None else: if name.lower() not in canonical_header_names: canonical_header_names[name.lower()] = name current_header_name = canonical_header_names[name.lower()] items[-1][1][current_header_name] = value.strip() elif (seeking_headers and raw_line[0] in ' \t' and current_header_name is not None): items[-1][1][current_header_name] += ' ' + line elif line: seeking_headers = False current_header_name = None prev_line = line return items def dump(items, outfile): w = csv.writer(outfile) headings = tuple(sorted(set().union(*(set(fields.keys()) for title, fields in items)))) w.writerow(('Name',) + headings) for title, fields in items: w.writerow((title,) + tuple(fields.get(field, '') for field in headings)) def main(infilename): dump(parse(open(infilename)), sys.stdout) if __name__ == '__main__': main(sys.argv[1])