Files
Rust-CUDA/scripts/gen_libdevice_json.py
2021-10-17 19:36:45 -04:00

86 lines
2.7 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Takes the pdf file of the libcuda docs and generates a JSON file representing it.
# That json file is then used to generate internal intrinsics as well as intrinsics docs.
# libdevice is 300+ intrinsics, therefore making a script to do this is better for developer
# sanity as well as extensibility for any future versions of libdevice.
import pdfplumber
import os
import re
import json
dirname = os.path.dirname(__file__)
filename = os.path.join(dirname, 'data/libdevice.pdf')
out_filename = os.path.join(dirname, 'data/libdevice.json')
text = ""
with pdfplumber.open(filename) as pdf:
for page in pdf.pages:
text += page.extract_text()
open("scripts/data/libdevice.txt", "w", encoding="utf8").write(text)
# I know this is bad but trust me its much less work than writing a proper parser
regex = r"3\.\d+\.\s(\w+)(?!\.)\nPrototype:\n(.+)\nDescription:\n([\s\S]*?(?=Returns:))Returns:\n([\s\S]*?(?=Library Availability))Library Availability:\n([\s\S]*?(?=(3\.\d+\.)|\Z|www\.nvidia\.com))"
# The raw text includes the page footer which messes up the regex so clean that up before we go on
sanitize_regex = r"www.nvidia.com\nLibdevice User's Guide Part 000 _v8.0 \| \d+Function Reference\n"
text = re.sub(sanitize_regex, "", text)
# renders better in markdown
text = text.replace("\u2023", "-")
# replace more than one space in a row with a single space
text = re.sub(" +", " ", text)
# the text conversion has some issues with the math symbols in the pdf
# it seems to turn x and y into \nx and \ny
text = text.replace("\nx", "x")
text = text.replace("\ny", "y")
# i dont even know
text = text.replace(".x", "x.")
matches = re.finditer(regex, text)
intrinsics = []
type_map = {
"float": "f32",
"double": "f64",
"i8": "i8",
"i16": "i16",
"i32": "i32",
"i64": "i64",
"void": "()",
"i8*": "*mut i8",
"i16*": "*mut i16",
"i32*": "*mut i32",
"i64*": "*mut i64",
"float*": "*mut f32",
"double*": "*mut f64",
}
for match in matches:
sig_txt = match.group(2).strip()
sig = {}
return_ty = type_map[re.search(".*(?= @)", sig_txt).group()]
params = []
for param in re.finditer("(\w+\*?)(?= %) %(\w+)", sig_txt):
params.append(
{
"name": param.group(2).strip(),
"type": type_map[param.group(1).strip()]
}
)
sig["params"] = params
sig["returns"] = return_ty
intrinsics.append(
{
"name": match.group(1).strip(),
"sig": sig,
"description": match.group(3).strip(),
"returns": match.group(4).strip(),
"availability": match.group(5).strip()
}
)
out = open(out_filename, "w", encoding="utf8")
out.write(json.dumps(intrinsics, indent=2))