mysql/tools/scripts/collations.py
Ruben Perez 7c5c7a6cd4 Collations for MySQL and MariaDB are now independent.
The collation enum has been converted into plain integers.
Separate headers for MySQL and MariaDB collation IDs have been added.
A script to generate those headers from table dumps has been added.

Close #120
2023-02-20 02:18:44 +01:00

87 lines
2.9 KiB
Python

#!/usr/bin/python3
#
# Copyright (c) 2019-2023 Ruben Perez Hidalgo (rubenperez038 at gmail dot com)
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#
# This script generates the collation headers, given a dump of the SHOW COLLATION
# statement for MySQL and MariaDB. MySQL 5 and 8 are compatible, while MariaDB
# renamed some collations in a way that makes having them in a separate header preferable.
# e.g. mysql -u root -e "SHOW COLLATION" > private/mysql-collations.txt
# mysql -u root -e "SHOW COLLATION" > private/mariadb-collations.txt
import pandas as pd
from pathlib import Path
from os import path
from typing import Literal
from subprocess import run
REPO_BASE = Path(path.abspath(path.join(path.dirname(path.realpath(__file__)), '..', '..')))
MYSQL_SHOW_COLLATION = REPO_BASE.joinpath('private', 'mysql-collations.txt')
MARIADB_SHOW_COLLATION = REPO_BASE.joinpath('private', 'mariadb-collations.txt')
COLLATIONS_ENTRY_TEMPLATE = '''
// Identifies the {collation} collation in {flavor} servers.
constexpr std::uint16_t {collation} = {id};
'''
COLLATIONS_HEADER_TEMPLATE = '''
#ifndef BOOST_MYSQL_{flavor}_COLLATION_IDS_HPP
#define BOOST_MYSQL_{flavor}_COLLATION_IDS_HPP
// This header was generated by collations.py - do not edit directly
#include <cstdint>
namespace boost {{
namespace mysql {{
namespace {flavor}_collations {{
{entries}
}} // namespace {flavor}_collations
}} // namespace mysql
}} // namespace boost
#endif
'''
def parse_show_collation(fname: Path) -> pd.DataFrame:
return pd \
.read_table(fname)[['Collation', 'Id']] \
.rename(columns={ 'Collation': 'collation', 'Id': 'id'}) \
.sort_values(by='id')
def render_collations_header(flavor: Literal['mysql', 'mariadb'], df_collations: pd.DataFrame) -> str:
entries = ''.join(COLLATIONS_ENTRY_TEMPLATE.format(
collation=r.collation,
id=r.id,
flavor=flavor
) for r in df_collations.itertuples())
return COLLATIONS_HEADER_TEMPLATE.format(flavor=flavor, entries=entries)
# Actually perform the generation
def write_headers(df_mysql: pd.DataFrame, df_mariadb: pd.DataFrame) -> None:
for flavor, df in [('mysql', df_mysql), ('mariadb', df_mariadb)]:
fname = REPO_BASE.joinpath('include', 'boost', 'mysql', f'{flavor}_collations.hpp')
with open(fname, 'wt') as f:
f.write(render_collations_header(flavor, df))
# We need to run file_headers.py to set copyrights and headers
def invoke_file_headers() -> None:
run(['python', str(REPO_BASE.joinpath('tools', 'scripts', 'file_headers.py'))])
def main():
df_mysql = parse_show_collation(MYSQL_SHOW_COLLATION)
df_mariadb = parse_show_collation(MARIADB_SHOW_COLLATION)
write_headers(df_mysql, df_mariadb)
invoke_file_headers()
if __name__ == '__main__':
main()