A small script to convert the HTML version of ORF Teletext into ANSI escape sequences so that you can browse teletext from your terminal.
You can also download the script here: telnettext
#!/usr/bin/env python3
#
# telnettext: ORF Teletext HTML to ANSI
# Copyright (C) 2022 Thomas Perl <m@thp.io>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
"""
This tool parses the HTML pages at text.orf.at and converts
them to ANSI escape sequences to be read in your terminal.
---------
Commands understood in --interactive/-i mode:
[channel:]000[.0] .... Navigate to channel, page, subpage
j .................... Go to next page
k .................... Go to previous page
h .................... Go to previous subpage
l .................... Go to next subpage
p .................... Go back one page in history
q .................... Exit interactive mode
Examples:
orfiii:100 ........... Go to ORFIII, page 100
100.3 ................ Go to page 100, subpage 3
orf1: ................ Switch channel to ORF1
.4 ................... Go to subpage 4
200 .................. Go to page 100, subpage 1
"""
import re
import argparse
import html.parser
import urllib.request
parser = argparse.ArgumentParser(description='ORF Teletext HTML to ANSI')
parser.add_argument('page', type=int, nargs='?', default=100, help='Page number (e.g. 100 or 886)')
parser.add_argument('sub', type=int, nargs='?', default=1, help='Sub-page number (e.g. 1)')
parser.add_argument('--channel', '-c', type=str, nargs='?', default='orf1',
help='Channel (orf1, orf2, orfiii, sportplus)')
parser.add_argument('--interactive', '-i', action='store_true', help='Interactive browsing mode')
args = parser.parse_args()
colormap = {
'black': 30,
'red': 31,
'green': 32,
'yellow': 33,
'blue': 34,
'magenta': 35,
'cyan': 36,
'white': 37,
}
for k, v in dict(colormap).items():
colormap[f'G{k}'] = v
class TeletextHTMLParser(html.parser.HTMLParser):
def __init__(self):
super().__init__()
self.path = []
self.pagewrapper = False
def handle_starttag(self, tag, attrs):
a = dict(attrs)
if tag == 'div' and a.get('id') == 'pagewrapper':
self.pagewrapper = True
self.path.append((tag, a))
if self.pagewrapper:
if tag == 'div' and a.get('class') == 'line':
print() # newline
if tag == 'div' and a.get('class') == 'run':
bg = a['data-bg']
fg = a['data-fg']
fgi = colormap[fg]
bgi = colormap[bg] + 10
print(end='\033[%d;%dm' % (fgi, bgi))
if a.get('data-flash') == 'true':
# https://www.real-world-systems.com/docs/ANSIcode.html#bridim
print(end='\033[5m')
if a.get('data-size') == 'double size':
print(end='\033#3') # TODO: Store + print next line with #4
if a.get('data-size') == 'double width':
print(end='\033#6') # FIXME: Non-double-width prefix/suffix not handled
if a.get('data-size') == 'double height':
... # TODO: ANSI seems to not support single-width, double-height
def handle_endtag(self, tag):
_, a = self.path.pop()
if tag == 'div' and a.get('class') == 'run':
print(end='\033[0m')
if tag == 'div' and a.get('id') == 'pagewrapper':
self.pagewrapper = False
print() # newline
def handle_data(self, data):
if self.pagewrapper:
tag, a = self.path[-1]
if (tag == 'div' and a.get('class') == 'run') or (tag == 'a'):
if a.get('data-charcode') is not None:
assert a['data-charcode'][-1] == 'h'
codepoint = int(a['data-charcode'][:-1], 16)
# https://text.orf.at/channel/orf1/page/886/1.html
# https://en.wikipedia.org/wiki/Teletext_character_set
if codepoint >= 0x20 and codepoint <= 0x2F:
data = ' ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ
๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ'[codepoint - 0x20]
elif codepoint >= 0x30 and codepoint <= 0x3F:
data = '๐ฌ๐ฌ๐ฌ๐ฌ๐ฌโ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ๐ฌ'[codepoint - 0x30]
elif codepoint >= 0x60 and codepoint <= 0x6F:
data = '๐ฌ๐ฌ๐ฌ ๐ฌก๐ฌข๐ฌฃ๐ฌค๐ฌฅ๐ฌฆ๐ฌงโ๐ฌจ๐ฌฉ๐ฌช๐ฌซ๐ฌฌ'[codepoint - 0x60]
elif codepoint >= 0x70 and codepoint <= 0x7F:
data = '๐ฌญ๐ฌฎ๐ฌฏ๐ฌฐ๐ฌฑ๐ฌฒ๐ฌณ๐ฌด๐ฌต๐ฌถ๐ฌท๐ฌธ๐ฌน๐ฌบ๐ฌปโ'[codepoint - 0x70]
else:
raise ValueError(codepoint)
print(data, end='')
page = args.page
sub = args.sub
channel = args.channel
history = []
while True:
history.append((page, sub))
print(f'\033[2J\033[Htelnettext {channel}:{page:03d}.{sub} [2022 thp.io]')
try:
f = urllib.request.urlopen(f'https://text.orf.at/channel/{channel}/page/{page}/{sub}.html')
d = f.read().decode()
parser = TeletextHTMLParser()
parser.feed(d)
except urllib.error.HTTPError as e:
print(f'HTTP Error: {e}')
if not args.interactive:
break
d = input('([channel:]000[.0]|hjklpq|help)> ')
if not d:
break
if d == 'help':
print(__doc__)
input('press any key to continue')
m = re.match(r'^(?:(orf1|orf2|orfiii|sportplus)?[:])?(?:(?:(\d{3}))?(?:[.](\d+))?)?$', d)
if m is not None:
channel = m.group(1) or channel
page = int(m.group(2) or page)
if m.group(2) is not None:
sub = 1
sub = int(m.group(3) or sub)
if d == 'q':
break
if d == 'h':
sub = max(1, sub - 1)
elif d == 'j':
page += 1
elif d == 'k':
page -= 1
elif d == 'l':
sub += 1
elif d == 'p' and len(history) > 1:
history.pop()
page, sub = history.pop()
page = max(100, min(999, page))
sub = max(1, sub)