#!/usr/bin/env python3 # # Scrape GATT UUIDs from Bluetooth SIG page # https://www.bluetooth.com/specifications/assigned-numbers/logical-link-control/ # # Copyright 2019 BlueKitchen GmbH # from lxml import html import datetime import requests import sys import codecs import os import re headers = {'user-agent': 'curl/7.63.0'} program_info = ''' BTstack PSM Scraper Copyright 2019, BlueKitchen GmbH ''' header = ''' /** * bluetooth_psm.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_psm.py * {datetime} */ #ifndef BLUETOOTH_PSM_H #define BLUETOOTH_PSM_H ''' page_info = ''' /** * Assigned numbers from {page} */ ''' trailer = ''' #endif ''' tags = [] def strip_non_ascii(string): stripped = (c for c in string if 0 < ord(c) < 127) return ''.join(stripped) def create_name(psm): # limit to ascii psm = strip_non_ascii(psm) # remove parts in braces p = re.compile('\(.*\)') tag = p.sub('',psm).rstrip().upper() tag = tag.replace('-', '_') return "BLUETOOTH_PSM_" + tag def scrape_page(fout, url): global headers print("Parsing %s" % url) fout.write(page_info.format(page=url.replace('https://',''))) # get from web r = requests.get(url, headers=headers) content = r.text # test: fetch from local file 'index.html' # f = codecs.open("index.html", "r", "utf-8") # content = f.read(); tree = html.fromstring(content) rows = tree.xpath('//table/tbody/tr') for row in rows: children = row.getchildren() psm = children[0].text_content() # abort when second table starts if (psm == '0x0000-0xFFFF'): break id_hex = children[1].text_content().replace(u'\u200b','') fout.write("#define %-80s %s\n" % (create_name(psm), id_hex)) btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') gen_path = btstack_root + '/src/bluetooth_psm.h' print(program_info) with open(gen_path, 'wt') as fout: fout.write(header.format(datetime=str(datetime.datetime.now()))) scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/logical-link-control/') fout.write(trailer) print('Scraping successful!\n')