From 2aa67a8881cfcafee40d10a9a25c87936a4569d7 Mon Sep 17 00:00:00 2001 From: Yossi Konstantinovsky Date: Thu, 15 Jul 2021 15:58:57 +0300 Subject: [PATCH 1/7] add test_pyexpat --- Lib/test/test_pyexpat.py | 734 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 734 insertions(+) create mode 100644 Lib/test/test_pyexpat.py diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py new file mode 100644 index 0000000000..b2b4dea060 --- /dev/null +++ b/Lib/test/test_pyexpat.py @@ -0,0 +1,734 @@ +# XXX TypeErrors on calling handlers, or on bad return values from a +# handler, are obscure and unhelpful. + +from io import BytesIO +import os +import platform +import sys +import sysconfig +import unittest +import traceback + +from xml.parsers import expat +from xml.parsers.expat import errors + +from test.support import sortdict + + +class SetAttributeTest(unittest.TestCase): + def setUp(self): + self.parser = expat.ParserCreate(namespace_separator='!') + + def test_buffer_text(self): + self.assertIs(self.parser.buffer_text, False) + for x in 0, 1, 2, 0: + self.parser.buffer_text = x + self.assertIs(self.parser.buffer_text, bool(x)) + + def test_namespace_prefixes(self): + self.assertIs(self.parser.namespace_prefixes, False) + for x in 0, 1, 2, 0: + self.parser.namespace_prefixes = x + self.assertIs(self.parser.namespace_prefixes, bool(x)) + + def test_ordered_attributes(self): + self.assertIs(self.parser.ordered_attributes, False) + for x in 0, 1, 2, 0: + self.parser.ordered_attributes = x + self.assertIs(self.parser.ordered_attributes, bool(x)) + + def test_specified_attributes(self): + self.assertIs(self.parser.specified_attributes, False) + for x in 0, 1, 2, 0: + self.parser.specified_attributes = x + self.assertIs(self.parser.specified_attributes, bool(x)) + + def test_invalid_attributes(self): + with self.assertRaises(AttributeError): + self.parser.returns_unicode = 1 + with self.assertRaises(AttributeError): + self.parser.returns_unicode + + # Issue #25019 + self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0) + self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0) + self.assertRaises(TypeError, getattr, self.parser, range(0xF)) + + +data = b'''\ + + + + + + + + + +%unparsed_entity; +]> + + + + Contents of subelements + + +&external_entity; +&skipped_entity; +\xb5 + +''' + + +# Produce UTF-8 output +class ParseTest(unittest.TestCase): + class Outputter: + def __init__(self): + self.out = [] + + def StartElementHandler(self, name, attrs): + self.out.append('Start element: ' + repr(name) + ' ' + + sortdict(attrs)) + + def EndElementHandler(self, name): + self.out.append('End element: ' + repr(name)) + + def CharacterDataHandler(self, data): + data = data.strip() + if data: + self.out.append('Character data: ' + repr(data)) + + def ProcessingInstructionHandler(self, target, data): + self.out.append('PI: ' + repr(target) + ' ' + repr(data)) + + def StartNamespaceDeclHandler(self, prefix, uri): + self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri)) + + def EndNamespaceDeclHandler(self, prefix): + self.out.append('End of NS decl: ' + repr(prefix)) + + def StartCdataSectionHandler(self): + self.out.append('Start of CDATA section') + + def EndCdataSectionHandler(self): + self.out.append('End of CDATA section') + + def CommentHandler(self, text): + self.out.append('Comment: ' + repr(text)) + + def NotationDeclHandler(self, *args): + name, base, sysid, pubid = args + self.out.append('Notation declared: %s' %(args,)) + + def UnparsedEntityDeclHandler(self, *args): + entityName, base, systemId, publicId, notationName = args + self.out.append('Unparsed entity decl: %s' %(args,)) + + def NotStandaloneHandler(self): + self.out.append('Not standalone') + return 1 + + def ExternalEntityRefHandler(self, *args): + context, base, sysId, pubId = args + self.out.append('External entity ref: %s' %(args[1:],)) + return 1 + + def StartDoctypeDeclHandler(self, *args): + self.out.append(('Start doctype', args)) + return 1 + + def EndDoctypeDeclHandler(self): + self.out.append("End doctype") + return 1 + + def EntityDeclHandler(self, *args): + self.out.append(('Entity declaration', args)) + return 1 + + def XmlDeclHandler(self, *args): + self.out.append(('XML declaration', args)) + return 1 + + def ElementDeclHandler(self, *args): + self.out.append(('Element declaration', args)) + return 1 + + def AttlistDeclHandler(self, *args): + self.out.append(('Attribute list declaration', args)) + return 1 + + def SkippedEntityHandler(self, *args): + self.out.append(("Skipped entity", args)) + return 1 + + def DefaultHandler(self, userData): + pass + + def DefaultHandlerExpand(self, userData): + pass + + handler_names = [ + 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler', + 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler', + 'NotationDeclHandler', 'StartNamespaceDeclHandler', + 'EndNamespaceDeclHandler', 'CommentHandler', + 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler', + 'DefaultHandlerExpand', 'NotStandaloneHandler', + 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler', + 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler', + 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler', + ] + + def _hookup_callbacks(self, parser, handler): + """ + Set each of the callbacks defined on handler and named in + self.handler_names on the given parser. + """ + for name in self.handler_names: + setattr(parser, name, getattr(handler, name)) + + def _verify_parse_output(self, operations): + expected_operations = [ + ('XML declaration', ('1.0', 'iso-8859-1', 0)), + 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'', + "Comment: ' comment data '", + "Not standalone", + ("Start doctype", ('quotations', 'quotations.dtd', None, 1)), + ('Element declaration', ('root', (2, 0, None, ()))), + ('Attribute list declaration', ('root', 'attr1', 'CDATA', None, + 1)), + ('Attribute list declaration', ('root', 'attr2', 'CDATA', None, + 0)), + "Notation declared: ('notation', None, 'notation.jpeg', None)", + ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)), + ('Entity declaration', ('external_entity', 0, None, None, + 'entity.file', None, None)), + "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')", + "Not standalone", + "End doctype", + "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}", + "NS decl: 'myns' 'http://www.python.org/namespace'", + "Start element: 'http://www.python.org/namespace!subelement' {}", + "Character data: 'Contents of subelements'", + "End element: 'http://www.python.org/namespace!subelement'", + "End of NS decl: 'myns'", + "Start element: 'sub2' {}", + 'Start of CDATA section', + "Character data: 'contents of CDATA section'", + 'End of CDATA section', + "End element: 'sub2'", + "External entity ref: (None, 'entity.file', None)", + ('Skipped entity', ('skipped_entity', 0)), + "Character data: '\xb5'", + "End element: 'root'", + ] + for operation, expected_operation in zip(operations, expected_operations): + self.assertEqual(operation, expected_operation) + + def test_parse_bytes(self): + out = self.Outputter() + parser = expat.ParserCreate(namespace_separator='!') + self._hookup_callbacks(parser, out) + + parser.Parse(data, True) + + operations = out.out + self._verify_parse_output(operations) + # Issue #6697. + self.assertRaises(AttributeError, getattr, parser, '\uD800') + + def test_parse_str(self): + out = self.Outputter() + parser = expat.ParserCreate(namespace_separator='!') + self._hookup_callbacks(parser, out) + + parser.Parse(data.decode('iso-8859-1'), True) + + operations = out.out + self._verify_parse_output(operations) + + def test_parse_file(self): + # Try parsing a file + out = self.Outputter() + parser = expat.ParserCreate(namespace_separator='!') + self._hookup_callbacks(parser, out) + file = BytesIO(data) + + parser.ParseFile(file) + + operations = out.out + self._verify_parse_output(operations) + + def test_parse_again(self): + parser = expat.ParserCreate() + file = BytesIO(data) + parser.ParseFile(file) + # Issue 6676: ensure a meaningful exception is raised when attempting + # to parse more than one XML document per xmlparser instance, + # a limitation of the Expat library. + with self.assertRaises(expat.error) as cm: + parser.ParseFile(file) + self.assertEqual(expat.ErrorString(cm.exception.code), + expat.errors.XML_ERROR_FINISHED) + +class NamespaceSeparatorTest(unittest.TestCase): + def test_legal(self): + # Tests that make sure we get errors when the namespace_separator value + # is illegal, and that we don't for good values: + expat.ParserCreate() + expat.ParserCreate(namespace_separator=None) + expat.ParserCreate(namespace_separator=' ') + + def test_illegal(self): + try: + expat.ParserCreate(namespace_separator=42) + self.fail() + except TypeError as e: + self.assertEqual(str(e), + "ParserCreate() argument 'namespace_separator' must be str or None, not int") + + try: + expat.ParserCreate(namespace_separator='too long') + self.fail() + except ValueError as e: + self.assertEqual(str(e), + 'namespace_separator must be at most one character, omitted, or None') + + def test_zero_length(self): + # ParserCreate() needs to accept a namespace_separator of zero length + # to satisfy the requirements of RDF applications that are required + # to simply glue together the namespace URI and the localname. Though + # considered a wart of the RDF specifications, it needs to be supported. + # + # See XML-SIG mailing list thread starting with + # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html + # + expat.ParserCreate(namespace_separator='') # too short + + +class InterningTest(unittest.TestCase): + def test(self): + # Test the interning machinery. + p = expat.ParserCreate() + L = [] + def collector(name, *args): + L.append(name) + p.StartElementHandler = collector + p.EndElementHandler = collector + p.Parse(b" ", True) + tag = L[0] + self.assertEqual(len(L), 6) + for entry in L: + # L should have the same string repeated over and over. + self.assertTrue(tag is entry) + + def test_issue9402(self): + # create an ExternalEntityParserCreate with buffer text + class ExternalOutputter: + def __init__(self, parser): + self.parser = parser + self.parser_result = None + + def ExternalEntityRefHandler(self, context, base, sysId, pubId): + external_parser = self.parser.ExternalEntityParserCreate("") + self.parser_result = external_parser.Parse(b"", True) + return 1 + + parser = expat.ParserCreate(namespace_separator='!') + parser.buffer_text = 1 + out = ExternalOutputter(parser) + parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler + parser.Parse(data, True) + self.assertEqual(out.parser_result, 1) + + +class BufferTextTest(unittest.TestCase): + def setUp(self): + self.stuff = [] + self.parser = expat.ParserCreate() + self.parser.buffer_text = 1 + self.parser.CharacterDataHandler = self.CharacterDataHandler + + def check(self, expected, label): + self.assertEqual(self.stuff, expected, + "%s\nstuff = %r\nexpected = %r" + % (label, self.stuff, map(str, expected))) + + def CharacterDataHandler(self, text): + self.stuff.append(text) + + def StartElementHandler(self, name, attrs): + self.stuff.append("<%s>" % name) + bt = attrs.get("buffer-text") + if bt == "yes": + self.parser.buffer_text = 1 + elif bt == "no": + self.parser.buffer_text = 0 + + def EndElementHandler(self, name): + self.stuff.append("" % name) + + def CommentHandler(self, data): + self.stuff.append("" % data) + + def setHandlers(self, handlers=[]): + for name in handlers: + setattr(self.parser, name, getattr(self, name)) + + def test_default_to_disabled(self): + parser = expat.ParserCreate() + self.assertFalse(parser.buffer_text) + + def test_buffering_enabled(self): + # Make sure buffering is turned on + self.assertTrue(self.parser.buffer_text) + self.parser.Parse(b"123", True) + self.assertEqual(self.stuff, ['123'], + "buffered text not properly collapsed") + + def test1(self): + # XXX This test exposes more detail of Expat's text chunking than we + # XXX like, but it tests what we need to concisely. + self.setHandlers(["StartElementHandler"]) + self.parser.Parse(b"12\n34\n5", True) + self.assertEqual(self.stuff, + ["", "1", "", "2", "\n", "3", "", "4\n5"], + "buffering control not reacting as expected") + + def test2(self): + self.parser.Parse(b"1<2> \n 3", True) + self.assertEqual(self.stuff, ["1<2> \n 3"], + "buffered text not properly collapsed") + + def test3(self): + self.setHandlers(["StartElementHandler"]) + self.parser.Parse(b"123", True) + self.assertEqual(self.stuff, ["", "1", "", "2", "", "3"], + "buffered text not properly split") + + def test4(self): + self.setHandlers(["StartElementHandler", "EndElementHandler"]) + self.parser.CharacterDataHandler = None + self.parser.Parse(b"123", True) + self.assertEqual(self.stuff, + ["", "", "", "", "", ""]) + + def test5(self): + self.setHandlers(["StartElementHandler", "EndElementHandler"]) + self.parser.Parse(b"123", True) + self.assertEqual(self.stuff, + ["", "1", "", "", "2", "", "", "3", ""]) + + def test6(self): + self.setHandlers(["CommentHandler", "EndElementHandler", + "StartElementHandler"]) + self.parser.Parse(b"12345 ", True) + self.assertEqual(self.stuff, + ["", "1", "", "", "2", "", "", "345", ""], + "buffered text not properly split") + + def test7(self): + self.setHandlers(["CommentHandler", "EndElementHandler", + "StartElementHandler"]) + self.parser.Parse(b"12345 ", True) + self.assertEqual(self.stuff, + ["", "1", "", "", "2", "", "", "3", + "", "4", "", "5", ""], + "buffered text not properly split") + + +# Test handling of exception from callback: +class HandlerExceptionTest(unittest.TestCase): + def StartElementHandler(self, name, attrs): + raise RuntimeError(name) + + def check_traceback_entry(self, entry, filename, funcname): + self.assertEqual(os.path.basename(entry[0]), filename) + self.assertEqual(entry[2], funcname) + + def test_exception(self): + parser = expat.ParserCreate() + parser.StartElementHandler = self.StartElementHandler + try: + parser.Parse(b"", True) + self.fail() + except RuntimeError as e: + self.assertEqual(e.args[0], 'a', + "Expected RuntimeError for element 'a', but" + \ + " found %r" % e.args[0]) + # Check that the traceback contains the relevant line in pyexpat.c + entries = traceback.extract_tb(e.__traceback__) + self.assertEqual(len(entries), 3) + self.check_traceback_entry(entries[0], + "test_pyexpat.py", "test_exception") + self.check_traceback_entry(entries[1], + "pyexpat.c", "StartElement") + self.check_traceback_entry(entries[2], + "test_pyexpat.py", "StartElementHandler") + if sysconfig.is_python_build() and not (sys.platform == 'win32' and platform.machine() == 'ARM'): + self.assertIn('call_with_frame("StartElement"', entries[1][3]) + + +# Test Current* members: +class PositionTest(unittest.TestCase): + def StartElementHandler(self, name, attrs): + self.check_pos('s') + + def EndElementHandler(self, name): + self.check_pos('e') + + def check_pos(self, event): + pos = (event, + self.parser.CurrentByteIndex, + self.parser.CurrentLineNumber, + self.parser.CurrentColumnNumber) + self.assertTrue(self.upto < len(self.expected_list), + 'too many parser events') + expected = self.expected_list[self.upto] + self.assertEqual(pos, expected, + 'Expected position %s, got position %s' %(pos, expected)) + self.upto += 1 + + def test(self): + self.parser = expat.ParserCreate() + self.parser.StartElementHandler = self.StartElementHandler + self.parser.EndElementHandler = self.EndElementHandler + self.upto = 0 + self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), + ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] + + xml = b'\n \n \n \n' + self.parser.Parse(xml, True) + + +class sf1296433Test(unittest.TestCase): + def test_parse_only_xml_data(self): + # http://python.org/sf/1296433 + # + xml = "%s" % ('a' * 1025) + # this one doesn't crash + #xml = "%s" % ('a' * 10000) + + class SpecificException(Exception): + pass + + def handler(text): + raise SpecificException + + parser = expat.ParserCreate() + parser.CharacterDataHandler = handler + + self.assertRaises(Exception, parser.Parse, xml.encode('iso8859')) + +class ChardataBufferTest(unittest.TestCase): + """ + test setting of chardata buffer size + """ + + def test_1025_bytes(self): + self.assertEqual(self.small_buffer_test(1025), 2) + + def test_1000_bytes(self): + self.assertEqual(self.small_buffer_test(1000), 1) + + def test_wrong_size(self): + parser = expat.ParserCreate() + parser.buffer_text = 1 + with self.assertRaises(ValueError): + parser.buffer_size = -1 + with self.assertRaises(ValueError): + parser.buffer_size = 0 + with self.assertRaises((ValueError, OverflowError)): + parser.buffer_size = sys.maxsize + 1 + with self.assertRaises(TypeError): + parser.buffer_size = 512.0 + + def test_unchanged_size(self): + xml1 = b"" + b'a' * 512 + xml2 = b'a'*512 + b'' + parser = expat.ParserCreate() + parser.CharacterDataHandler = self.counting_handler + parser.buffer_size = 512 + parser.buffer_text = 1 + + # Feed 512 bytes of character data: the handler should be called + # once. + self.n = 0 + parser.Parse(xml1) + self.assertEqual(self.n, 1) + + # Reassign to buffer_size, but assign the same size. + parser.buffer_size = parser.buffer_size + self.assertEqual(self.n, 1) + + # Try parsing rest of the document + parser.Parse(xml2) + self.assertEqual(self.n, 2) + + + def test_disabling_buffer(self): + xml1 = b"" + b'a' * 512 + xml2 = b'b' * 1024 + xml3 = b'c' * 1024 + b''; + parser = expat.ParserCreate() + parser.CharacterDataHandler = self.counting_handler + parser.buffer_text = 1 + parser.buffer_size = 1024 + self.assertEqual(parser.buffer_size, 1024) + + # Parse one chunk of XML + self.n = 0 + parser.Parse(xml1, False) + self.assertEqual(parser.buffer_size, 1024) + self.assertEqual(self.n, 1) + + # Turn off buffering and parse the next chunk. + parser.buffer_text = 0 + self.assertFalse(parser.buffer_text) + self.assertEqual(parser.buffer_size, 1024) + for i in range(10): + parser.Parse(xml2, False) + self.assertEqual(self.n, 11) + + parser.buffer_text = 1 + self.assertTrue(parser.buffer_text) + self.assertEqual(parser.buffer_size, 1024) + parser.Parse(xml3, True) + self.assertEqual(self.n, 12) + + def counting_handler(self, text): + self.n += 1 + + def small_buffer_test(self, buffer_len): + xml = b"" + b'a' * buffer_len + b'' + parser = expat.ParserCreate() + parser.CharacterDataHandler = self.counting_handler + parser.buffer_size = 1024 + parser.buffer_text = 1 + + self.n = 0 + parser.Parse(xml) + return self.n + + def test_change_size_1(self): + xml1 = b"" + b'a' * 1024 + xml2 = b'aaa' + b'a' * 1025 + b'' + parser = expat.ParserCreate() + parser.CharacterDataHandler = self.counting_handler + parser.buffer_text = 1 + parser.buffer_size = 1024 + self.assertEqual(parser.buffer_size, 1024) + + self.n = 0 + parser.Parse(xml1, False) + parser.buffer_size *= 2 + self.assertEqual(parser.buffer_size, 2048) + parser.Parse(xml2, True) + self.assertEqual(self.n, 2) + + def test_change_size_2(self): + xml1 = b"a" + b'a' * 1023 + xml2 = b'aaa' + b'a' * 1025 + b'' + parser = expat.ParserCreate() + parser.CharacterDataHandler = self.counting_handler + parser.buffer_text = 1 + parser.buffer_size = 2048 + self.assertEqual(parser.buffer_size, 2048) + + self.n=0 + parser.Parse(xml1, False) + parser.buffer_size = parser.buffer_size // 2 + self.assertEqual(parser.buffer_size, 1024) + parser.Parse(xml2, True) + self.assertEqual(self.n, 4) + +class MalformedInputTest(unittest.TestCase): + def test1(self): + xml = b"\0\r\n" + parser = expat.ParserCreate() + try: + parser.Parse(xml, True) + self.fail() + except expat.ExpatError as e: + self.assertEqual(str(e), 'unclosed token: line 2, column 0') + + def test2(self): + # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE) + xml = b"\r\n" + parser = expat.ParserCreate() + err_pattern = r'XML declaration not well-formed: line 1, column \d+' + with self.assertRaisesRegex(expat.ExpatError, err_pattern): + parser.Parse(xml, True) + +class ErrorMessageTest(unittest.TestCase): + def test_codes(self): + # verify mapping of errors.codes and errors.messages + self.assertEqual(errors.XML_ERROR_SYNTAX, + errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]]) + + def test_expaterror(self): + xml = b'<' + parser = expat.ParserCreate() + try: + parser.Parse(xml, True) + self.fail() + except expat.ExpatError as e: + self.assertEqual(e.code, + errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN]) + + +class ForeignDTDTests(unittest.TestCase): + """ + Tests for the UseForeignDTD method of expat parser objects. + """ + def test_use_foreign_dtd(self): + """ + If UseForeignDTD is passed True and a document without an external + entity reference is parsed, ExternalEntityRefHandler is first called + with None for the public and system ids. + """ + handler_call_args = [] + def resolve_entity(context, base, system_id, public_id): + handler_call_args.append((public_id, system_id)) + return 1 + + parser = expat.ParserCreate() + parser.UseForeignDTD(True) + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) + parser.ExternalEntityRefHandler = resolve_entity + parser.Parse(b"") + self.assertEqual(handler_call_args, [(None, None)]) + + # test UseForeignDTD() is equal to UseForeignDTD(True) + handler_call_args[:] = [] + + parser = expat.ParserCreate() + parser.UseForeignDTD() + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) + parser.ExternalEntityRefHandler = resolve_entity + parser.Parse(b"") + self.assertEqual(handler_call_args, [(None, None)]) + + def test_ignore_use_foreign_dtd(self): + """ + If UseForeignDTD is passed True and a document with an external + entity reference is parsed, ExternalEntityRefHandler is called with + the public and system ids from the document. + """ + handler_call_args = [] + def resolve_entity(context, base, system_id, public_id): + handler_call_args.append((public_id, system_id)) + return 1 + + parser = expat.ParserCreate() + parser.UseForeignDTD(True) + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) + parser.ExternalEntityRefHandler = resolve_entity + parser.Parse( + b"") + self.assertEqual(handler_call_args, [("bar", "baz")]) + + +if __name__ == "__main__": + unittest.main() From d382009a5fa52d2b3d2a8f852a4a7df01b94148c Mon Sep 17 00:00:00 2001 From: Yossi Konstantinovsky Date: Thu, 15 Jul 2021 14:15:09 +0300 Subject: [PATCH 2/7] add initial pyexpat implementation --- Cargo.lock | 7 ++ Lib/plistlib.py | 3 +- vm/Cargo.toml | 1 + vm/src/stdlib/mod.rs | 2 + vm/src/stdlib/pyexpat.rs | 205 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 216 insertions(+), 2 deletions(-) create mode 100644 vm/src/stdlib/pyexpat.rs diff --git a/Cargo.lock b/Cargo.lock index 26a0c90e3f..75980386cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2112,6 +2112,7 @@ dependencies = [ "widestring", "winapi", "winreg", + "xml-rs", ] [[package]] @@ -2960,6 +2961,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "xml-rs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a" + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 282d7af6b2..3f8263b922 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -62,8 +62,7 @@ import os import re import struct from warnings import warn -# XXX RUSTPYTHON TODO: pyexpat -# from xml.parsers.expat import ParserCreate +from xml.parsers.expat import ParserCreate PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__) diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 1e4983e243..ee83595950 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -34,6 +34,7 @@ sha-1 = "0.9.4" sha2 = "0.9.3" sha3 = "0.9.1" blake2 = "0.9.1" +xml-rs = "0.8" num-complex = { version = "0.4.0", features = ["serde"] } num-bigint = { version = "0.4.0", features = ["serde"] } diff --git a/vm/src/stdlib/mod.rs b/vm/src/stdlib/mod.rs index 46a70298d5..8f4cb7d92c 100644 --- a/vm/src/stdlib/mod.rs +++ b/vm/src/stdlib/mod.rs @@ -26,6 +26,7 @@ mod marshal; mod math; mod operator; mod platform; +mod pyexpat; pub(crate) mod pystruct; mod random; // TODO: maybe make this an extension module, if we ever get those @@ -119,6 +120,7 @@ pub fn get_module_inits() -> StdlibMap { "marshal" => marshal::make_module, "math" => math::make_module, "_operator" => operator::make_module, + "pyexpat" => pyexpat::make_module, "_platform" => platform::make_module, "_random" => random::make_module, "_sre" => sre::make_module, diff --git a/vm/src/stdlib/pyexpat.rs b/vm/src/stdlib/pyexpat.rs new file mode 100644 index 0000000000..3f24ad1036 --- /dev/null +++ b/vm/src/stdlib/pyexpat.rs @@ -0,0 +1,205 @@ +/* Pyexpat builtin module +* +* +*/ + +use crate::vm::VirtualMachine; +use crate::PyObjectRef; + +pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { + let module = _pyexpat::make_module(vm); + + extend_module!(vm, module, { + "errors" => _errors::make_module(vm), + "model" => _model::make_module(vm) + }); + + module +} + +#[pymodule(name = "pyexpat")] +mod _pyexpat { + use crate::builtins::{PyStr, PyStrRef, PyTypeRef}; + use crate::byteslike::PyBytesLike; + use crate::function::{IntoFuncArgs, OptionalArg}; + use crate::pyobject::StaticType; + use crate::{ + IntoPyObject, IntoPyRef, ItemProtocol, PyObjectRef, PyRef, PyResult, PyValue, + TryFromObject, VirtualMachine, + }; + + use rustpython_common::lock::PyRwLock; + use std::io::Cursor; + use xml::reader::XmlEvent; + type MutableObject = PyRwLock; + + #[pyattr] + #[pyclass(name = "xmlparser", module = false)] + #[derive(Debug)] + struct PyExpatLikeXmlParser { + start_element: MutableObject, + end_element: MutableObject, + character_data: MutableObject, + entity_decl: MutableObject, + } + type PyExpatLikeXmlParserRef = PyRef; + + impl PyValue for PyExpatLikeXmlParser { + fn class(_vm: &VirtualMachine) -> &PyTypeRef { + Self::static_type() + } + } + + #[inline] + fn invoke_handler(vm: &VirtualMachine, handler: &MutableObject, args: T) + where + T: IntoFuncArgs, + { + vm.invoke(&handler.read().clone(), args).ok(); + } + + #[pyimpl] + impl PyExpatLikeXmlParser { + fn new(vm: &VirtualMachine) -> PyResult { + Ok(PyExpatLikeXmlParser { + start_element: MutableObject::new(vm.ctx.none()), + end_element: MutableObject::new(vm.ctx.none()), + character_data: MutableObject::new(vm.ctx.none()), + entity_decl: MutableObject::new(vm.ctx.none()), + } + .into_ref(vm)) + } + + #[pyproperty(name = "StartElementHandler")] + fn start_element_handler(&self) -> PyObjectRef { + self.start_element.read().clone() + } + + #[pyproperty(setter, name = "StartElementHandler")] + fn set_start_element_handler(&self, func: PyObjectRef) { + let mut handler = self.start_element.write(); + *handler = func; + } + + #[pyproperty(name = "EndElementHandler")] + fn end_element_handler(&self) -> PyObjectRef { + self.end_element.read().clone() + } + + #[pyproperty(setter, name = "EndElementHandler")] + fn set_end_element_handler(&self, func: PyObjectRef) { + let mut handler = self.end_element.write(); + *handler = func; + } + + #[pyproperty(name = "CharacterDataHandler")] + fn character_data_handler(&self) -> PyObjectRef { + self.character_data.read().clone() + } + + #[pyproperty(setter, name = "CharacterDataHandler")] + fn set_character_data_handler(&self, func: PyObjectRef) { + let mut handler = self.character_data.write(); + *handler = func; + } + + #[pyproperty(name = "EntityDeclHandler")] + fn entity_decl(&self) -> PyObjectRef { + self.entity_decl.read().clone() + } + + #[pyproperty(setter, name = "EntityDeclHandler")] + fn set_decl_handler(&self, func: PyObjectRef) { + let mut handler = self.entity_decl.write(); + *handler = func; + } + + fn create_config(&self) -> xml::ParserConfig { + xml::ParserConfig::new() + .cdata_to_characters(true) + .coalesce_characters(false) + .whitespace_to_characters(true) + } + + fn do_parse(&self, vm: &VirtualMachine, parser: xml::EventReader) + where + T: std::io::Read, + { + for e in parser { + match e { + Ok(XmlEvent::StartElement { + name, attributes, .. + }) => { + let dict = vm.ctx.new_dict(); + for attribute in attributes { + dict.set_item( + attribute.name.local_name.as_str(), + vm.ctx.new_str(attribute.value), + vm, + ) + .unwrap(); + } + + let name_str = PyStr::from(name.local_name).into_ref(vm); + invoke_handler(vm, &self.start_element, (name_str, dict.into_object())); + } + Ok(XmlEvent::EndElement { name, .. }) => { + let name_str = PyStr::from(name.local_name).into_ref(vm); + invoke_handler(vm, &self.end_element, (name_str,)); + } + Ok(XmlEvent::Characters(chars)) => { + let str = PyStr::from(chars).into_ref(vm); + invoke_handler(vm, &self.character_data, (str,)); + } + _ => {} + } + } + } + + #[pymethod(name = "Parse")] + fn parse(&self, data: PyStrRef, _isfinal: OptionalArg, vm: &VirtualMachine) { + let reader = Cursor::>::new(data.as_str().as_bytes().to_vec()); + let parser = self.create_config().create_reader(reader); + self.do_parse(vm, parser); + } + + #[pymethod(name = "ParseFile")] + fn parse_file(&self, file: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + // todo: read chunks at a time + let read_res = vm.call_method(&file, "read", ())?; + let bytes_like = PyBytesLike::try_from_object(vm, read_res)?; + let buf = bytes_like.borrow_buf().to_vec(); + let reader = Cursor::new(buf); + let parser = self.create_config().create_reader(reader); + self.do_parse(vm, parser); + + // todo: return value + Ok(()) + } + } + + #[derive(FromArgs)] + #[allow(dead_code)] + struct ParserCreateArgs { + #[pyarg(any, optional)] + encoding: OptionalArg, + #[pyarg(any, optional)] + namespace_separator: OptionalArg, + #[pyarg(any, optional)] + intern: OptionalArg, + } + + #[pyfunction(name = "ParserCreate")] + fn parser_create( + _args: ParserCreateArgs, + vm: &VirtualMachine, + ) -> PyResult { + PyExpatLikeXmlParser::new(vm) + } +} + +#[pymodule(name = "model")] +mod _model {} + +#[pymodule(name = "errors")] +mod _errors {} From ec90a2267aca00560eae70da40533379e0b6eb38 Mon Sep 17 00:00:00 2001 From: Yossi Konstantinovsky Date: Sat, 17 Jul 2021 11:42:27 +0300 Subject: [PATCH 3/7] pyexpat: mark tests as expected failure --- Lib/test/test_pyexpat.py | 84 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index b2b4dea060..6e34f369d6 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -19,30 +19,40 @@ class SetAttributeTest(unittest.TestCase): def setUp(self): self.parser = expat.ParserCreate(namespace_separator='!') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_buffer_text(self): self.assertIs(self.parser.buffer_text, False) for x in 0, 1, 2, 0: self.parser.buffer_text = x self.assertIs(self.parser.buffer_text, bool(x)) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_namespace_prefixes(self): self.assertIs(self.parser.namespace_prefixes, False) for x in 0, 1, 2, 0: self.parser.namespace_prefixes = x self.assertIs(self.parser.namespace_prefixes, bool(x)) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ordered_attributes(self): self.assertIs(self.parser.ordered_attributes, False) for x in 0, 1, 2, 0: self.parser.ordered_attributes = x self.assertIs(self.parser.ordered_attributes, bool(x)) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_specified_attributes(self): self.assertIs(self.parser.specified_attributes, False) for x in 0, 1, 2, 0: self.parser.specified_attributes = x self.assertIs(self.parser.specified_attributes, bool(x)) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_invalid_attributes(self): with self.assertRaises(AttributeError): self.parser.returns_unicode = 1 @@ -226,6 +236,8 @@ class ParseTest(unittest.TestCase): for operation, expected_operation in zip(operations, expected_operations): self.assertEqual(operation, expected_operation) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_parse_bytes(self): out = self.Outputter() parser = expat.ParserCreate(namespace_separator='!') @@ -238,6 +250,8 @@ class ParseTest(unittest.TestCase): # Issue #6697. self.assertRaises(AttributeError, getattr, parser, '\uD800') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_parse_str(self): out = self.Outputter() parser = expat.ParserCreate(namespace_separator='!') @@ -248,6 +262,8 @@ class ParseTest(unittest.TestCase): operations = out.out self._verify_parse_output(operations) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_parse_file(self): # Try parsing a file out = self.Outputter() @@ -260,6 +276,8 @@ class ParseTest(unittest.TestCase): operations = out.out self._verify_parse_output(operations) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_parse_again(self): parser = expat.ParserCreate() file = BytesIO(data) @@ -273,6 +291,8 @@ class ParseTest(unittest.TestCase): expat.errors.XML_ERROR_FINISHED) class NamespaceSeparatorTest(unittest.TestCase): + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_legal(self): # Tests that make sure we get errors when the namespace_separator value # is illegal, and that we don't for good values: @@ -280,6 +300,8 @@ class NamespaceSeparatorTest(unittest.TestCase): expat.ParserCreate(namespace_separator=None) expat.ParserCreate(namespace_separator=' ') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_illegal(self): try: expat.ParserCreate(namespace_separator=42) @@ -295,6 +317,8 @@ class NamespaceSeparatorTest(unittest.TestCase): self.assertEqual(str(e), 'namespace_separator must be at most one character, omitted, or None') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_zero_length(self): # ParserCreate() needs to accept a namespace_separator of zero length # to satisfy the requirements of RDF applications that are required @@ -308,6 +332,9 @@ class NamespaceSeparatorTest(unittest.TestCase): class InterningTest(unittest.TestCase): + + # TODO: RUSTPYTHON + @unittest.expectedFailure def test(self): # Test the interning machinery. p = expat.ParserCreate() @@ -323,6 +350,8 @@ class InterningTest(unittest.TestCase): # L should have the same string repeated over and over. self.assertTrue(tag is entry) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_issue9402(self): # create an ExternalEntityParserCreate with buffer text class ExternalOutputter: @@ -376,10 +405,14 @@ class BufferTextTest(unittest.TestCase): for name in handlers: setattr(self.parser, name, getattr(self, name)) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_default_to_disabled(self): parser = expat.ParserCreate() self.assertFalse(parser.buffer_text) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_buffering_enabled(self): # Make sure buffering is turned on self.assertTrue(self.parser.buffer_text) @@ -387,6 +420,8 @@ class BufferTextTest(unittest.TestCase): self.assertEqual(self.stuff, ['123'], "buffered text not properly collapsed") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test1(self): # XXX This test exposes more detail of Expat's text chunking than we # XXX like, but it tests what we need to concisely. @@ -396,17 +431,23 @@ class BufferTextTest(unittest.TestCase): ["", "1", "", "2", "\n", "3", "", "4\n5"], "buffering control not reacting as expected") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test2(self): self.parser.Parse(b"1<2> \n 3", True) self.assertEqual(self.stuff, ["1<2> \n 3"], "buffered text not properly collapsed") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test3(self): self.setHandlers(["StartElementHandler"]) self.parser.Parse(b"123", True) self.assertEqual(self.stuff, ["", "1", "", "2", "", "3"], "buffered text not properly split") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test4(self): self.setHandlers(["StartElementHandler", "EndElementHandler"]) self.parser.CharacterDataHandler = None @@ -414,12 +455,16 @@ class BufferTextTest(unittest.TestCase): self.assertEqual(self.stuff, ["", "", "", "", "", ""]) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test5(self): self.setHandlers(["StartElementHandler", "EndElementHandler"]) self.parser.Parse(b"123", True) self.assertEqual(self.stuff, ["", "1", "", "", "2", "", "", "3", ""]) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test6(self): self.setHandlers(["CommentHandler", "EndElementHandler", "StartElementHandler"]) @@ -428,6 +473,8 @@ class BufferTextTest(unittest.TestCase): ["", "1", "", "", "2", "", "", "345", ""], "buffered text not properly split") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test7(self): self.setHandlers(["CommentHandler", "EndElementHandler", "StartElementHandler"]) @@ -447,6 +494,8 @@ class HandlerExceptionTest(unittest.TestCase): self.assertEqual(os.path.basename(entry[0]), filename) self.assertEqual(entry[2], funcname) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_exception(self): parser = expat.ParserCreate() parser.StartElementHandler = self.StartElementHandler @@ -490,6 +539,8 @@ class PositionTest(unittest.TestCase): 'Expected position %s, got position %s' %(pos, expected)) self.upto += 1 + # TODO: RUSTPYTHON + @unittest.expectedFailure def test(self): self.parser = expat.ParserCreate() self.parser.StartElementHandler = self.StartElementHandler @@ -503,6 +554,9 @@ class PositionTest(unittest.TestCase): class sf1296433Test(unittest.TestCase): + + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_parse_only_xml_data(self): # http://python.org/sf/1296433 # @@ -526,12 +580,18 @@ class ChardataBufferTest(unittest.TestCase): test setting of chardata buffer size """ + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_1025_bytes(self): self.assertEqual(self.small_buffer_test(1025), 2) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_1000_bytes(self): self.assertEqual(self.small_buffer_test(1000), 1) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_wrong_size(self): parser = expat.ParserCreate() parser.buffer_text = 1 @@ -544,6 +604,8 @@ class ChardataBufferTest(unittest.TestCase): with self.assertRaises(TypeError): parser.buffer_size = 512.0 + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_unchanged_size(self): xml1 = b"" + b'a' * 512 xml2 = b'a'*512 + b'' @@ -566,7 +628,8 @@ class ChardataBufferTest(unittest.TestCase): parser.Parse(xml2) self.assertEqual(self.n, 2) - + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_disabling_buffer(self): xml1 = b"" + b'a' * 512 xml2 = b'b' * 1024 @@ -611,6 +674,8 @@ class ChardataBufferTest(unittest.TestCase): parser.Parse(xml) return self.n + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_change_size_1(self): xml1 = b"" + b'a' * 1024 xml2 = b'aaa' + b'a' * 1025 + b'' @@ -627,6 +692,8 @@ class ChardataBufferTest(unittest.TestCase): parser.Parse(xml2, True) self.assertEqual(self.n, 2) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_change_size_2(self): xml1 = b"a" + b'a' * 1023 xml2 = b'aaa' + b'a' * 1025 + b'' @@ -644,6 +711,9 @@ class ChardataBufferTest(unittest.TestCase): self.assertEqual(self.n, 4) class MalformedInputTest(unittest.TestCase): + + # TODO: RUSTPYTHON + @unittest.expectedFailure def test1(self): xml = b"\0\r\n" parser = expat.ParserCreate() @@ -653,6 +723,8 @@ class MalformedInputTest(unittest.TestCase): except expat.ExpatError as e: self.assertEqual(str(e), 'unclosed token: line 2, column 0') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test2(self): # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE) xml = b"\r\n" @@ -662,11 +734,16 @@ class MalformedInputTest(unittest.TestCase): parser.Parse(xml, True) class ErrorMessageTest(unittest.TestCase): + + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_codes(self): # verify mapping of errors.codes and errors.messages self.assertEqual(errors.XML_ERROR_SYNTAX, errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]]) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_expaterror(self): xml = b'<' parser = expat.ParserCreate() @@ -682,6 +759,9 @@ class ForeignDTDTests(unittest.TestCase): """ Tests for the UseForeignDTD method of expat parser objects. """ + + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_use_foreign_dtd(self): """ If UseForeignDTD is passed True and a document without an external @@ -710,6 +790,8 @@ class ForeignDTDTests(unittest.TestCase): parser.Parse(b"") self.assertEqual(handler_call_args, [(None, None)]) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ignore_use_foreign_dtd(self): """ If UseForeignDTD is passed True and a document with an external From e61a583223621adb1ecd5538cc5d168e9585f23b Mon Sep 17 00:00:00 2001 From: Yossi Konstantinovsky Date: Sat, 17 Jul 2021 20:07:14 +0300 Subject: [PATCH 4/7] pyexpat: refactor getters and setters for handlers --- vm/src/stdlib/pyexpat.rs | 70 ++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 46 deletions(-) diff --git a/vm/src/stdlib/pyexpat.rs b/vm/src/stdlib/pyexpat.rs index 3f24ad1036..8c04de9a56 100644 --- a/vm/src/stdlib/pyexpat.rs +++ b/vm/src/stdlib/pyexpat.rs @@ -10,13 +10,25 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { let module = _pyexpat::make_module(vm); extend_module!(vm, module, { - "errors" => _errors::make_module(vm), - "model" => _model::make_module(vm) + "errors" => _errors::make_module(vm), + "model" => _model::make_module(vm), }); module } +macro_rules! create_property { + ($ctx: expr, $attributes: expr, $name: expr, $element: ident) => { + let attr = $ctx.new_getset( + $name, + move |this: &PyExpatLikeXmlParser| this.$element.read().clone(), + move |this: &PyExpatLikeXmlParser, func: PyObjectRef| *this.$element.write() = func, + ); + + $attributes.insert($name.to_owned(), attr); + }; +} + #[pymodule(name = "pyexpat")] mod _pyexpat { use crate::builtins::{PyStr, PyStrRef, PyTypeRef}; @@ -24,8 +36,8 @@ mod _pyexpat { use crate::function::{IntoFuncArgs, OptionalArg}; use crate::pyobject::StaticType; use crate::{ - IntoPyObject, IntoPyRef, ItemProtocol, PyObjectRef, PyRef, PyResult, PyValue, - TryFromObject, VirtualMachine, + ItemProtocol, PyContext, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, + VirtualMachine, }; use rustpython_common::lock::PyRwLock; @@ -36,7 +48,7 @@ mod _pyexpat { #[pyattr] #[pyclass(name = "xmlparser", module = false)] #[derive(Debug)] - struct PyExpatLikeXmlParser { + pub struct PyExpatLikeXmlParser { start_element: MutableObject, end_element: MutableObject, character_data: MutableObject, @@ -70,48 +82,14 @@ mod _pyexpat { .into_ref(vm)) } - #[pyproperty(name = "StartElementHandler")] - fn start_element_handler(&self) -> PyObjectRef { - self.start_element.read().clone() - } + #[extend_class] + fn extend_class_with_fields(ctx: &PyContext, class: &PyTypeRef) { + let mut attributes = class.attributes.write(); - #[pyproperty(setter, name = "StartElementHandler")] - fn set_start_element_handler(&self, func: PyObjectRef) { - let mut handler = self.start_element.write(); - *handler = func; - } - - #[pyproperty(name = "EndElementHandler")] - fn end_element_handler(&self) -> PyObjectRef { - self.end_element.read().clone() - } - - #[pyproperty(setter, name = "EndElementHandler")] - fn set_end_element_handler(&self, func: PyObjectRef) { - let mut handler = self.end_element.write(); - *handler = func; - } - - #[pyproperty(name = "CharacterDataHandler")] - fn character_data_handler(&self) -> PyObjectRef { - self.character_data.read().clone() - } - - #[pyproperty(setter, name = "CharacterDataHandler")] - fn set_character_data_handler(&self, func: PyObjectRef) { - let mut handler = self.character_data.write(); - *handler = func; - } - - #[pyproperty(name = "EntityDeclHandler")] - fn entity_decl(&self) -> PyObjectRef { - self.entity_decl.read().clone() - } - - #[pyproperty(setter, name = "EntityDeclHandler")] - fn set_decl_handler(&self, func: PyObjectRef) { - let mut handler = self.entity_decl.write(); - *handler = func; + create_property!(ctx, attributes, "StartElementHandler", start_element); + create_property!(ctx, attributes, "EndElementHandler", end_element); + create_property!(ctx, attributes, "CharacterDataHandler", character_data); + create_property!(ctx, attributes, "EntityDeclHandler", entity_decl); } fn create_config(&self) -> xml::ParserConfig { From e1d8d47c7d07a092f824c05153443649c72bba6f Mon Sep 17 00:00:00 2001 From: Yossi Konstantinovsky Date: Mon, 19 Jul 2021 21:52:06 +0300 Subject: [PATCH 5/7] pyexpat: add missing property to make tests run --- vm/src/stdlib/pyexpat.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vm/src/stdlib/pyexpat.rs b/vm/src/stdlib/pyexpat.rs index 8c04de9a56..6a3893d163 100644 --- a/vm/src/stdlib/pyexpat.rs +++ b/vm/src/stdlib/pyexpat.rs @@ -53,6 +53,7 @@ mod _pyexpat { end_element: MutableObject, character_data: MutableObject, entity_decl: MutableObject, + buffer_text: MutableObject, } type PyExpatLikeXmlParserRef = PyRef; @@ -78,6 +79,7 @@ mod _pyexpat { end_element: MutableObject::new(vm.ctx.none()), character_data: MutableObject::new(vm.ctx.none()), entity_decl: MutableObject::new(vm.ctx.none()), + buffer_text: MutableObject::new(vm.ctx.new_bool(false)), } .into_ref(vm)) } @@ -90,6 +92,7 @@ mod _pyexpat { create_property!(ctx, attributes, "EndElementHandler", end_element); create_property!(ctx, attributes, "CharacterDataHandler", character_data); create_property!(ctx, attributes, "EntityDeclHandler", entity_decl); + create_property!(ctx, attributes, "buffer_text", buffer_text); } fn create_config(&self) -> xml::ParserConfig { From 570a3893e7c7849c3bc66c1b4a57fb6bbd8a2252 Mon Sep 17 00:00:00 2001 From: Yossi Konstantinovsky Date: Mon, 19 Jul 2021 22:02:14 +0300 Subject: [PATCH 6/7] pyexpat: remove expected failure from passing tests --- Lib/test/test_pyexpat.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 6e34f369d6..33e1ffb836 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -51,8 +51,6 @@ class SetAttributeTest(unittest.TestCase): self.parser.specified_attributes = x self.assertIs(self.parser.specified_attributes, bool(x)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_invalid_attributes(self): with self.assertRaises(AttributeError): self.parser.returns_unicode = 1 @@ -317,8 +315,6 @@ class NamespaceSeparatorTest(unittest.TestCase): self.assertEqual(str(e), 'namespace_separator must be at most one character, omitted, or None') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_zero_length(self): # ParserCreate() needs to accept a namespace_separator of zero length # to satisfy the requirements of RDF applications that are required @@ -405,8 +401,6 @@ class BufferTextTest(unittest.TestCase): for name in handlers: setattr(self.parser, name, getattr(self, name)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_default_to_disabled(self): parser = expat.ParserCreate() self.assertFalse(parser.buffer_text) @@ -555,8 +549,6 @@ class PositionTest(unittest.TestCase): class sf1296433Test(unittest.TestCase): - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_parse_only_xml_data(self): # http://python.org/sf/1296433 # From f35f6ad18c7f5abcfc12bd0b1dcfae7f8d4097f2 Mon Sep 17 00:00:00 2001 From: Yossi Konstantinovsky Date: Tue, 20 Jul 2021 14:01:03 +0300 Subject: [PATCH 7/7] tests: fix test_mac_ver on M1 macs --- Lib/test/test_platform.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Lib/test/test_platform.py b/Lib/test/test_platform.py index 96676aab55..7a5146216d 100644 --- a/Lib/test/test_platform.py +++ b/Lib/test/test_platform.py @@ -248,15 +248,11 @@ class PlatformTest(unittest.TestCase): self.assertEqual(res[1], ('', '', '')) if sys.byteorder == 'little': - self.assertIn(res[2], ('i386', 'x86_64')) + self.assertIn(res[2], ('i386', 'x86_64', 'arm64')) else: self.assertEqual(res[2], 'PowerPC') - # TODO: RUSTPYTHON - if sys.platform == "darwin": - test_mac_ver = unittest.expectedFailure(test_mac_ver) - # TODO: RUSTPYTHON @unittest.expectedFailure @unittest.skipUnless(sys.platform == 'darwin', "OSX only test")