mirror of
https://github.com/Rust-GPU/Rust-CUDA.git
synced 2026-06-01 05:39:48 +09:00
86 lines
2.7 KiB
Python
86 lines
2.7 KiB
Python
# Takes the pdf file of the libcuda docs and generates a JSON file representing it.
|
||
# That json file is then used to generate internal intrinsics as well as intrinsics docs.
|
||
# libdevice is 300+ intrinsics, therefore making a script to do this is better for developer
|
||
# sanity as well as extensibility for any future versions of libdevice.
|
||
|
||
import pdfplumber
|
||
import os
|
||
import re
|
||
import json
|
||
|
||
dirname = os.path.dirname(__file__)
|
||
filename = os.path.join(dirname, 'data/libdevice.pdf')
|
||
out_filename = os.path.join(dirname, 'data/libdevice.json')
|
||
|
||
text = ""
|
||
with pdfplumber.open(filename) as pdf:
|
||
for page in pdf.pages:
|
||
text += page.extract_text()
|
||
|
||
open("scripts/data/libdevice.txt", "w", encoding="utf8").write(text)
|
||
|
||
# I know this is bad but trust me its much less work than writing a proper parser
|
||
regex = r"3\.\d+\.\s(\w+)(?!\.)\nPrototype:\n(.+)\nDescription:\n([\s\S]*?(?=Returns:))Returns:\n([\s\S]*?(?=Library Availability))Library Availability:\n([\s\S]*?(?=(3\.\d+\.)|\Z|www\.nvidia\.com))"
|
||
|
||
# The raw text includes the page footer which messes up the regex so clean that up before we go on
|
||
sanitize_regex = r"www.nvidia.com\nLibdevice User's Guide Part 000 _v8.0 \| \d+Function Reference\n"
|
||
|
||
text = re.sub(sanitize_regex, "", text)
|
||
# renders better in markdown
|
||
text = text.replace("\u2023", "-")
|
||
# replace more than one space in a row with a single space
|
||
text = re.sub(" +", " ", text)
|
||
# the text conversion has some issues with the math symbols in the pdf
|
||
# it seems to turn x and y into \nx and \ny
|
||
text = text.replace("\nx", "x")
|
||
text = text.replace("\ny", "y")
|
||
# i dont even know
|
||
text = text.replace(".x", "x.")
|
||
|
||
matches = re.finditer(regex, text)
|
||
intrinsics = []
|
||
type_map = {
|
||
"float": "f32",
|
||
"double": "f64",
|
||
"i8": "i8",
|
||
"i16": "i16",
|
||
"i32": "i32",
|
||
"i64": "i64",
|
||
"void": "()",
|
||
"i8*": "*mut i8",
|
||
"i16*": "*mut i16",
|
||
"i32*": "*mut i32",
|
||
"i64*": "*mut i64",
|
||
"float*": "*mut f32",
|
||
"double*": "*mut f64",
|
||
}
|
||
|
||
for match in matches:
|
||
sig_txt = match.group(2).strip()
|
||
sig = {}
|
||
return_ty = type_map[re.search(".*(?= @)", sig_txt).group()]
|
||
params = []
|
||
for param in re.finditer("(\w+\*?)(?= %) %(\w+)", sig_txt):
|
||
params.append(
|
||
{
|
||
"name": param.group(2).strip(),
|
||
"type": type_map[param.group(1).strip()]
|
||
}
|
||
)
|
||
|
||
sig["params"] = params
|
||
sig["returns"] = return_ty
|
||
|
||
intrinsics.append(
|
||
{
|
||
"name": match.group(1).strip(),
|
||
"sig": sig,
|
||
"description": match.group(3).strip(),
|
||
"returns": match.group(4).strip(),
|
||
"availability": match.group(5).strip()
|
||
}
|
||
)
|
||
|
||
out = open(out_filename, "w", encoding="utf8")
|
||
out.write(json.dumps(intrinsics, indent=2))
|