#!/usr/bin/python3 "Convert .vtt files from youtube-dl into plain text." import sys, re tag = re.compile(r'<[^>]*>') def main(infile): #seen = set() lastline = None for line in infile: line = line.strip() if not line or ' --> ' in line: continue line = tag.sub('', line) if line == lastline: continue lastline = line #seen.add(line) assert isinstance(line, str), line yield line yield '\n' if __name__ == '__main__': sys.stdout.writelines(main(sys.stdin))