diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py new file mode 100644 index 000000000..46249e513 --- /dev/null +++ b/Lib/test/test_minidom.py @@ -0,0 +1,1788 @@ +# test for xml.dom.minidom + +import copy +import pickle +import io +from test import support +import unittest + +import xml.dom.minidom + +from xml.dom.minidom import parse, Attr, Node, Document, Element, parseString +from xml.dom.minidom import getDOMImplementation +from xml.parsers.expat import ExpatError + + +tstfile = support.findfile("test.xml", subdir="xmltestdata") +sample = ("\n" + "\n" + " \n" + "]> text\n" + " ") + +# The tests of DocumentType importing use these helpers to construct +# the documents to work with, since not all DOM builders actually +# create the DocumentType nodes. +def create_doc_without_doctype(doctype=None): + return getDOMImplementation().createDocument(None, "doc", doctype) + +def create_nonempty_doctype(): + doctype = getDOMImplementation().createDocumentType("doc", None, None) + doctype.entities._seq = [] + doctype.notations._seq = [] + notation = xml.dom.minidom.Notation("my-notation", None, + "http://xml.python.org/notations/my") + doctype.notations._seq.append(notation) + entity = xml.dom.minidom.Entity("my-entity", None, + "http://xml.python.org/entities/my", + "my-notation") + entity.version = "1.0" + entity.encoding = "utf-8" + entity.actualEncoding = "us-ascii" + doctype.entities._seq.append(entity) + return doctype + +def create_doc_with_doctype(): + doctype = create_nonempty_doctype() + doc = create_doc_without_doctype(doctype) + doctype.entities.item(0).ownerDocument = doc + doctype.notations.item(0).ownerDocument = doc + return doc + +class MinidomTest(unittest.TestCase): + def confirm(self, test, testname = "Test"): + self.assertTrue(test, testname) + + def checkWholeText(self, node, s): + t = node.wholeText + self.assertEqual(t, s, "looking for %r, found %r" % (s, t)) + + def testDocumentAsyncAttr(self): + doc = Document() + self.assertFalse(doc.async_) + self.assertFalse(Document.async_) + + def testParseFromBinaryFile(self): + with open(tstfile, 'rb') as file: + dom = parse(file) + dom.unlink() + self.assertIsInstance(dom, Document) + + def testParseFromTextFile(self): + with open(tstfile, 'r', encoding='iso-8859-1') as file: + dom = parse(file) + dom.unlink() + self.assertIsInstance(dom, Document) + + def testAttrModeSetsParamsAsAttrs(self): + attr = Attr("qName", "namespaceURI", "localName", "prefix") + self.assertEqual(attr.name, "qName") + self.assertEqual(attr.namespaceURI, "namespaceURI") + self.assertEqual(attr.prefix, "prefix") + self.assertEqual(attr.localName, "localName") + + def testAttrModeSetsNonOptionalAttrs(self): + attr = Attr("qName", "namespaceURI", None, "prefix") + self.assertEqual(attr.name, "qName") + self.assertEqual(attr.namespaceURI, "namespaceURI") + self.assertEqual(attr.prefix, "prefix") + self.assertEqual(attr.localName, attr.name) + + def testGetElementsByTagName(self): + dom = parse(tstfile) + self.assertEqual(dom.getElementsByTagName("LI"), + dom.documentElement.getElementsByTagName("LI")) + dom.unlink() + + def testInsertBefore(self): + dom = parseString("") + root = dom.documentElement + elem = root.childNodes[0] + nelem = dom.createElement("element") + root.insertBefore(nelem, elem) + self.assertEqual(len(root.childNodes), 2) + self.assertEqual(root.childNodes.length, 2) + self.assertIs(root.childNodes[0], nelem) + self.assertIs(root.childNodes.item(0), nelem) + self.assertIs(root.childNodes[1], elem) + self.assertIs(root.childNodes.item(1), elem) + self.assertIs(root.firstChild, nelem) + self.assertIs(root.lastChild, elem) + self.assertEqual(root.toxml(), "") + nelem = dom.createElement("element") + root.insertBefore(nelem, None) + self.assertEqual(len(root.childNodes), 3) + self.assertEqual(root.childNodes.length, 3) + self.assertIs(root.childNodes[1], elem) + self.assertIs(root.childNodes.item(1), elem) + self.assertIs(root.childNodes[2], nelem) + self.assertIs(root.childNodes.item(2), nelem) + self.assertIs(root.lastChild, nelem) + self.assertIs(nelem.previousSibling, elem) + self.assertEqual(root.toxml(), "") + nelem2 = dom.createElement("bar") + root.insertBefore(nelem2, nelem) + self.assertEqual(len(root.childNodes), 4) + self.assertEqual(root.childNodes.length, 4) + self.assertIs(root.childNodes[2], nelem2) + self.assertIs(root.childNodes.item(2), nelem2) + self.assertIs(root.childNodes[3], nelem) + self.assertIs(root.childNodes.item(3), nelem) + self.assertIs(nelem2.nextSibling, nelem) + self.assertIs(nelem.previousSibling, nelem2) + self.assertEqual(root.toxml(), + "") + dom.unlink() + + def _create_fragment_test_nodes(self): + dom = parseString("") + orig = dom.createTextNode("original") + c1 = dom.createTextNode("foo") + c2 = dom.createTextNode("bar") + c3 = dom.createTextNode("bat") + dom.documentElement.appendChild(orig) + frag = dom.createDocumentFragment() + frag.appendChild(c1) + frag.appendChild(c2) + frag.appendChild(c3) + return dom, orig, c1, c2, c3, frag + + def testInsertBeforeFragment(self): + dom, orig, c1, c2, c3, frag = self._create_fragment_test_nodes() + dom.documentElement.insertBefore(frag, None) + self.assertTupleEqual(tuple(dom.documentElement.childNodes), + (orig, c1, c2, c3), + "insertBefore(, None)") + frag.unlink() + dom.unlink() + + dom, orig, c1, c2, c3, frag = self._create_fragment_test_nodes() + dom.documentElement.insertBefore(frag, orig) + self.assertTupleEqual(tuple(dom.documentElement.childNodes), + (c1, c2, c3, orig), + "insertBefore(, orig)") + frag.unlink() + dom.unlink() + + def testAppendChild(self): + dom = parse(tstfile) + dom.documentElement.appendChild(dom.createComment("Hello")) + self.assertEqual(dom.documentElement.childNodes[-1].nodeName, "#comment") + self.assertEqual(dom.documentElement.childNodes[-1].data, "Hello") + dom.unlink() + + @support.requires_resource('cpu') + def testAppendChildNoQuadraticComplexity(self): + impl = getDOMImplementation() + + def work(n): + doc = impl.createDocument(None, "some_tag", None) + element = doc.documentElement + total_calls = 0 + + # Count attribute accesses as a proxy for work done + def getattribute_counter(self, attr): + nonlocal total_calls + total_calls += 1 + return object.__getattribute__(self, attr) + + with support.swap_attr(Element, "__getattribute__", getattribute_counter): + for _ in range(n): + child = doc.createElement("child") + element.appendChild(child) + element = child + return total_calls + + # Doubling N should not ~quadruple the work. + w1 = work(1024) + w2 = work(2048) + w3 = work(4096) + + self.assertGreater(w1, 0) + r1 = w2 / w1 + r2 = w3 / w2 + self.assertLess( + max(r1, r2), 3.2, + msg=f"Possible quadratic behavior: work={w1,w2,w3} ratios={r1,r2}" + ) + + def testSetAttributeNodeWithoutOwnerDocument(self): + # regression test for gh-142754 + elem = Element("test") + attr = Attr("id") + attr.value = "test-id" + elem.setAttributeNode(attr) + self.assertEqual(elem.getAttribute("id"), "test-id") + + def testAppendChildFragment(self): + dom, orig, c1, c2, c3, frag = self._create_fragment_test_nodes() + dom.documentElement.appendChild(frag) + self.assertTupleEqual(tuple(dom.documentElement.childNodes), + (orig, c1, c2, c3), + "appendChild()") + frag.unlink() + dom.unlink() + + def testReplaceChildFragment(self): + dom, orig, c1, c2, c3, frag = self._create_fragment_test_nodes() + dom.documentElement.replaceChild(frag, orig) + orig.unlink() + self.assertTupleEqual(tuple(dom.documentElement.childNodes), (c1, c2, c3), + "replaceChild()") + frag.unlink() + dom.unlink() + + def testLegalChildren(self): + dom = Document() + elem = dom.createElement('element') + text = dom.createTextNode('text') + self.assertRaises(xml.dom.HierarchyRequestErr, dom.appendChild, text) + + dom.appendChild(elem) + self.assertRaises(xml.dom.HierarchyRequestErr, dom.insertBefore, text, + elem) + self.assertRaises(xml.dom.HierarchyRequestErr, dom.replaceChild, text, + elem) + + nodemap = elem.attributes + self.assertRaises(xml.dom.HierarchyRequestErr, nodemap.setNamedItem, + text) + self.assertRaises(xml.dom.HierarchyRequestErr, nodemap.setNamedItemNS, + text) + + elem.appendChild(text) + dom.unlink() + + def testNamedNodeMapSetItem(self): + dom = Document() + elem = dom.createElement('element') + attrs = elem.attributes + attrs["foo"] = "bar" + a = attrs.item(0) + self.assertIs(a.ownerDocument, dom, + "NamedNodeMap.__setitem__() sets ownerDocument") + self.assertIs(a.ownerElement, elem, + "NamedNodeMap.__setitem__() sets ownerElement") + self.assertEqual(a.value, "bar", + "NamedNodeMap.__setitem__() sets value") + self.assertEqual(a.nodeValue, "bar", + "NamedNodeMap.__setitem__() sets nodeValue") + elem.unlink() + dom.unlink() + + def testNonZero(self): + dom = parse(tstfile) + self.assertTrue(dom) # should not be zero + dom.appendChild(dom.createComment("foo")) + self.assertFalse(dom.childNodes[-1].childNodes) + dom.unlink() + + def testUnlink(self): + dom = parse(tstfile) + self.assertTrue(dom.childNodes) + dom.unlink() + self.assertFalse(dom.childNodes) + + def testContext(self): + with parse(tstfile) as dom: + self.assertTrue(dom.childNodes) + self.assertFalse(dom.childNodes) + + def testElement(self): + dom = Document() + dom.appendChild(dom.createElement("abc")) + self.assertTrue(dom.documentElement) + dom.unlink() + + def testAAA(self): + dom = parseString("") + el = dom.documentElement + el.setAttribute("spam", "jam2") + self.assertEqual(el.toxml(), '', "testAAA") + a = el.getAttributeNode("spam") + self.assertIs(a.ownerDocument, dom, + "setAttribute() sets ownerDocument") + self.assertIs(a.ownerElement, dom.documentElement, + "setAttribute() sets ownerElement") + dom.unlink() + + def testAAB(self): + dom = parseString("") + el = dom.documentElement + el.setAttribute("spam", "jam") + el.setAttribute("spam", "jam2") + self.assertEqual(el.toxml(), '', "testAAB") + dom.unlink() + + def testAddAttr(self): + dom = Document() + child = dom.appendChild(dom.createElement("abc")) + + child.setAttribute("def", "ghi") + self.assertEqual(child.getAttribute("def"), "ghi") + self.assertEqual(child.attributes["def"].value, "ghi") + + child.setAttribute("jkl", "mno") + self.assertEqual(child.getAttribute("jkl"), "mno") + self.assertEqual(child.attributes["jkl"].value, "mno") + + self.assertEqual(len(child.attributes), 2) + + child.setAttribute("def", "newval") + self.assertEqual(child.getAttribute("def"), "newval") + self.assertEqual(child.attributes["def"].value, "newval") + + self.assertEqual(len(child.attributes), 2) + dom.unlink() + + def testDeleteAttr(self): + dom = Document() + child = dom.appendChild(dom.createElement("abc")) + + self.assertEqual(len(child.attributes), 0) + child.setAttribute("def", "ghi") + self.assertEqual(len(child.attributes), 1) + del child.attributes["def"] + self.assertEqual(len(child.attributes), 0) + dom.unlink() + + def testRemoveAttr(self): + dom = Document() + child = dom.appendChild(dom.createElement("abc")) + + child.setAttribute("def", "ghi") + self.assertEqual(len(child.attributes), 1) + self.assertRaises(xml.dom.NotFoundErr, child.removeAttribute, "foo") + child.removeAttribute("def") + self.assertEqual(len(child.attributes), 0) + dom.unlink() + + def testRemoveAttrNS(self): + dom = Document() + child = dom.appendChild( + dom.createElementNS("http://www.python.org", "python:abc")) + child.setAttributeNS("http://www.w3.org", "xmlns:python", + "http://www.python.org") + child.setAttributeNS("http://www.python.org", "python:abcattr", "foo") + self.assertRaises(xml.dom.NotFoundErr, child.removeAttributeNS, + "foo", "http://www.python.org") + self.assertEqual(len(child.attributes), 2) + child.removeAttributeNS("http://www.python.org", "abcattr") + self.assertEqual(len(child.attributes), 1) + dom.unlink() + + def testRemoveAttributeNode(self): + dom = Document() + child = dom.appendChild(dom.createElement("foo")) + child.setAttribute("spam", "jam") + self.assertEqual(len(child.attributes), 1) + node = child.getAttributeNode("spam") + self.assertRaises(xml.dom.NotFoundErr, child.removeAttributeNode, + None) + self.assertIs(node, child.removeAttributeNode(node)) + self.assertEqual(len(child.attributes), 0) + self.assertIsNone(child.getAttributeNode("spam")) + dom2 = Document() + child2 = dom2.appendChild(dom2.createElement("foo")) + node2 = child2.getAttributeNode("spam") + self.assertRaises(xml.dom.NotFoundErr, child2.removeAttributeNode, + node2) + dom.unlink() + + def testHasAttribute(self): + dom = Document() + child = dom.appendChild(dom.createElement("foo")) + child.setAttribute("spam", "jam") + self.assertTrue(child.hasAttribute("spam")) + + def testChangeAttr(self): + dom = parseString("") + el = dom.documentElement + el.setAttribute("spam", "jam") + self.assertEqual(len(el.attributes), 1) + el.setAttribute("spam", "bam") + # Set this attribute to be an ID and make sure that doesn't change + # when changing the value: + el.setIdAttribute("spam") + self.assertEqual(len(el.attributes), 1) + self.assertEqual(el.attributes["spam"].value, "bam") + self.assertEqual(el.attributes["spam"].nodeValue, "bam") + self.assertEqual(el.getAttribute("spam"), "bam") + self.assertTrue(el.getAttributeNode("spam").isId) + el.attributes["spam"] = "ham" + self.assertEqual(len(el.attributes), 1) + self.assertEqual(el.attributes["spam"].value, "ham") + self.assertEqual(el.attributes["spam"].nodeValue, "ham") + self.assertEqual(el.getAttribute("spam"), "ham") + self.assertTrue(el.attributes["spam"].isId) + el.setAttribute("spam2", "bam") + self.assertEqual(len(el.attributes), 2) + self.assertEqual(el.attributes["spam"].value, "ham") + self.assertEqual(el.attributes["spam"].nodeValue, "ham") + self.assertEqual(el.getAttribute("spam"), "ham") + self.assertEqual(el.attributes["spam2"].value, "bam") + self.assertEqual(el.attributes["spam2"].nodeValue, "bam") + self.assertEqual(el.getAttribute("spam2"), "bam") + el.attributes["spam2"] = "bam2" + + self.assertEqual(len(el.attributes), 2) + self.assertEqual(el.attributes["spam"].value, "ham") + self.assertEqual(el.attributes["spam"].nodeValue, "ham") + self.assertEqual(el.getAttribute("spam"), "ham") + self.assertEqual(el.attributes["spam2"].value, "bam2") + self.assertEqual(el.attributes["spam2"].nodeValue, "bam2") + self.assertEqual(el.getAttribute("spam2"), "bam2") + dom.unlink() + + def testGetAttrList(self): + dom = parseString("") + self.addCleanup(dom.unlink) + el = dom.documentElement + el.setAttribute("spam", "jam") + self.assertEqual(len(el.attributes.items()), 1) + el.setAttribute("foo", "bar") + items = el.attributes.items() + self.assertEqual(len(items), 2) + self.assertIn(('spam', 'jam'), items) + self.assertIn(('foo', 'bar'), items) + + def testGetAttrValues(self): + dom = parseString("") + self.addCleanup(dom.unlink) + el = dom.documentElement + el.setAttribute("spam", "jam") + values = [x.value for x in el.attributes.values()] + self.assertIn("jam", values) + el.setAttribute("foo", "bar") + values = [x.value for x in el.attributes.values()] + self.assertIn("bar", values) + self.assertIn("jam", values) + + def testGetAttribute(self): + dom = Document() + child = dom.appendChild( + dom.createElementNS("http://www.python.org", "python:abc")) + self.assertEqual(child.getAttribute('missing'), '') + + def testGetAttributeNS(self): + dom = Document() + child = dom.appendChild( + dom.createElementNS("http://www.python.org", "python:abc")) + child.setAttributeNS("http://www.w3.org", "xmlns:python", + "http://www.python.org") + self.assertEqual(child.getAttributeNS("http://www.w3.org", "python"), + 'http://www.python.org') + self.assertEqual(child.getAttributeNS("http://www.w3.org", "other"), + '') + child2 = child.appendChild(dom.createElement('abc')) + self.assertEqual(child2.getAttributeNS("http://www.python.org", "missing"), + '') + + def testGetAttributeNode(self): pass + + def testGetElementsByTagNameNS(self): + d=""" + + """ + dom = parseString(d) + elems = dom.getElementsByTagNameNS("http://pyxml.sf.net/minidom", + "myelem") + self.assertEqual(len(elems), 1) + self.assertEqual(elems[0].namespaceURI, "http://pyxml.sf.net/minidom") + self.assertEqual(elems[0].localName, "myelem") + self.assertEqual(elems[0].prefix, "minidom") + self.assertEqual(elems[0].tagName, "minidom:myelem") + self.assertEqual(elems[0].nodeName, "minidom:myelem") + dom.unlink() + + def get_empty_nodelist_from_elements_by_tagName_ns_helper(self, doc, nsuri, + lname): + nodelist = doc.getElementsByTagNameNS(nsuri, lname) + self.assertEqual(len(nodelist), 0) + + def testGetEmptyNodeListFromElementsByTagNameNS(self): + doc = parseString('') + self.get_empty_nodelist_from_elements_by_tagName_ns_helper( + doc, 'http://xml.python.org/namespaces/a', 'localname') + self.get_empty_nodelist_from_elements_by_tagName_ns_helper( + doc, '*', 'splat') + self.get_empty_nodelist_from_elements_by_tagName_ns_helper( + doc, 'http://xml.python.org/namespaces/a', '*') + + doc = parseString('') + self.get_empty_nodelist_from_elements_by_tagName_ns_helper( + doc, "http://xml.python.org/splat", "not-there") + self.get_empty_nodelist_from_elements_by_tagName_ns_helper( + doc, "*", "not-there") + self.get_empty_nodelist_from_elements_by_tagName_ns_helper( + doc, "http://somewhere.else.net/not-there", "e") + + def testElementReprAndStr(self): + dom = Document() + el = dom.appendChild(dom.createElement("abc")) + string1 = repr(el) + string2 = str(el) + self.assertEqual(string1, string2) + dom.unlink() + + def testElementReprAndStrUnicode(self): + dom = Document() + el = dom.appendChild(dom.createElement("abc")) + string1 = repr(el) + string2 = str(el) + self.assertEqual(string1, string2) + dom.unlink() + + def testElementReprAndStrUnicodeNS(self): + dom = Document() + el = dom.appendChild( + dom.createElementNS("http://www.slashdot.org", "slash:abc")) + string1 = repr(el) + string2 = str(el) + self.assertEqual(string1, string2) + self.assertIn("slash:abc", string1) + dom.unlink() + + def testAttributeRepr(self): + dom = Document() + el = dom.appendChild(dom.createElement("abc")) + node = el.setAttribute("abc", "def") + self.assertEqual(str(node), repr(node)) + dom.unlink() + + def testWriteXML(self): + str = '' + dom = parseString(str) + domstr = dom.toxml() + dom.unlink() + self.assertEqual(str, domstr) + + def test_toxml_quote_text(self): + dom = Document() + elem = dom.appendChild(dom.createElement('elem')) + elem.appendChild(dom.createTextNode('&<>"')) + cr = elem.appendChild(dom.createElement('cr')) + cr.appendChild(dom.createTextNode('\r')) + crlf = elem.appendChild(dom.createElement('crlf')) + crlf.appendChild(dom.createTextNode('\r\n')) + lflf = elem.appendChild(dom.createElement('lflf')) + lflf.appendChild(dom.createTextNode('\n\n')) + ws = elem.appendChild(dom.createElement('ws')) + ws.appendChild(dom.createTextNode('\t\n\r ')) + domstr = dom.toxml() + dom.unlink() + self.assertEqual(domstr, '' + '&<>"' + '\r' + '\r\n' + '\n\n' + '\t\n\r ') + + def test_toxml_quote_attrib(self): + dom = Document() + elem = dom.appendChild(dom.createElement('elem')) + elem.setAttribute("a", '&<>"') + elem.setAttribute("cr", "\r") + elem.setAttribute("lf", "\n") + elem.setAttribute("crlf", "\r\n") + elem.setAttribute("lflf", "\n\n") + elem.setAttribute("ws", "\t\n\r ") + domstr = dom.toxml() + dom.unlink() + self.assertEqual(domstr, '' + '') + + def testAltNewline(self): + str = '\n\n' + dom = parseString(str) + domstr = dom.toprettyxml(newl="\r\n") + dom.unlink() + self.assertEqual(domstr, str.replace("\n", "\r\n")) + + def test_toprettyxml_with_text_nodes(self): + # see issue #4147, text nodes are not indented + decl = '\n' + self.assertEqual(parseString('A').toprettyxml(), + decl + 'A\n') + self.assertEqual(parseString('AA').toprettyxml(), + decl + '\n\tA\n\tA\n\n') + self.assertEqual(parseString('AA').toprettyxml(), + decl + '\n\tA\n\tA\n\n') + self.assertEqual(parseString('AA').toprettyxml(), + decl + '\n\tA\n\tA\n\n') + self.assertEqual(parseString('AAA').toprettyxml(), + decl + '\n\tA\n\tA\n\tA\n\n') + + def test_toprettyxml_with_adjacent_text_nodes(self): + # see issue #4147, adjacent text nodes are indented normally + dom = Document() + elem = dom.createElement('elem') + elem.appendChild(dom.createTextNode('TEXT')) + elem.appendChild(dom.createTextNode('TEXT')) + dom.appendChild(elem) + decl = '\n' + self.assertEqual(dom.toprettyxml(), + decl + '\n\tTEXT\n\tTEXT\n\n') + + def test_toprettyxml_preserves_content_of_text_node(self): + # see issue #4147 + for str in ('A', 'C'): + dom = parseString(str) + dom2 = parseString(dom.toprettyxml()) + self.assertEqual( + dom.getElementsByTagName('B')[0].childNodes[0].toxml(), + dom2.getElementsByTagName('B')[0].childNodes[0].toxml()) + + def testProcessingInstruction(self): + dom = parseString('') + pi = dom.documentElement.firstChild + self.assertEqual(pi.target, "mypi") + self.assertEqual(pi.data, "data \t\n ") + self.assertEqual(pi.nodeName, "mypi") + self.assertEqual(pi.nodeType, Node.PROCESSING_INSTRUCTION_NODE) + self.assertIsNone(pi.attributes) + self.assertFalse(pi.hasChildNodes()) + self.assertEqual(len(pi.childNodes), 0) + self.assertIsNone(pi.firstChild) + self.assertIsNone(pi.lastChild) + self.assertIsNone(pi.localName) + self.assertEqual(pi.namespaceURI, xml.dom.EMPTY_NAMESPACE) + + def testProcessingInstructionRepr(self): + dom = parseString('') + pi = dom.documentElement.firstChild + self.assertEqual(str(pi.nodeType), repr(pi.nodeType)) + + def testTextRepr(self): + dom = Document() + self.addCleanup(dom.unlink) + elem = dom.createElement("elem") + elem.appendChild(dom.createTextNode("foo")) + el = elem.firstChild + self.assertEqual(str(el), repr(el)) + self.assertEqual('', str(el)) + + def testWriteText(self): pass + + def testDocumentElement(self): pass + + def testTooManyDocumentElements(self): + doc = parseString("") + elem = doc.createElement("extra") + # Should raise an exception when adding an extra document element. + self.assertRaises(xml.dom.HierarchyRequestErr, doc.appendChild, elem) + elem.unlink() + doc.unlink() + + def testCreateElementNS(self): pass + + def testCreateAttributeNS(self): pass + + def testParse(self): pass + + def testParseString(self): pass + + def testComment(self): pass + + def testAttrListItem(self): pass + + def testAttrListItems(self): pass + + def testAttrListItemNS(self): pass + + def testAttrListKeys(self): pass + + def testAttrListKeysNS(self): pass + + def testRemoveNamedItem(self): + doc = parseString("") + e = doc.documentElement + attrs = e.attributes + a1 = e.getAttributeNode("a") + a2 = attrs.removeNamedItem("a") + self.assertTrue(a1.isSameNode(a2)) + self.assertRaises(xml.dom.NotFoundErr, attrs.removeNamedItem, "a") + + def testRemoveNamedItemNS(self): + doc = parseString("") + e = doc.documentElement + attrs = e.attributes + a1 = e.getAttributeNodeNS("http://xml.python.org/", "b") + a2 = attrs.removeNamedItemNS("http://xml.python.org/", "b") + self.assertTrue(a1.isSameNode(a2)) + self.assertRaises(xml.dom.NotFoundErr, attrs.removeNamedItemNS, + "http://xml.python.org/", "b") + + def testAttrListValues(self): pass + + def testAttrListLength(self): pass + + def testAttrList__getitem__(self): pass + + def testAttrList__setitem__(self): pass + + def testSetAttrValueandNodeValue(self): pass + + def testParseElement(self): pass + + def testParseAttributes(self): pass + + def testParseElementNamespaces(self): pass + + def testParseAttributeNamespaces(self): pass + + def testParseProcessingInstructions(self): pass + + def testChildNodes(self): pass + + def testFirstChild(self): pass + + def testHasChildNodes(self): + dom = parseString("") + doc = dom.documentElement + self.assertTrue(doc.hasChildNodes()) + dom2 = parseString("") + doc2 = dom2.documentElement + self.assertFalse(doc2.hasChildNodes()) + + def _testCloneElementCopiesAttributes(self, e1, e2, test): + attrs1 = e1.attributes + attrs2 = e2.attributes + keys1 = list(attrs1.keys()) + keys2 = list(attrs2.keys()) + keys1.sort() + keys2.sort() + self.assertEqual(keys1, keys2) + for i in range(len(keys1)): + a1 = attrs1.item(i) + a2 = attrs2.item(i) + self.assertIsNot(a1, a2) + self.assertEqual(a1.value, a2.value) + self.assertEqual(a1.nodeValue, a2.nodeValue) + self.assertEqual(a1.namespaceURI,a2.namespaceURI) + self.assertEqual(a1.localName, a2.localName) + self.assertIs(a2.ownerElement, e2) + + def _setupCloneElement(self, deep): + dom = parseString("") + root = dom.documentElement + clone = root.cloneNode(deep) + self._testCloneElementCopiesAttributes( + root, clone, "testCloneElement" + (deep and "Deep" or "Shallow")) + # mutilate the original so shared data is detected + root.tagName = root.nodeName = "MODIFIED" + root.setAttribute("attr", "NEW VALUE") + root.setAttribute("added", "VALUE") + return dom, clone + + def testCloneElementShallow(self): + dom, clone = self._setupCloneElement(0) + self.assertEqual(len(clone.childNodes), 0) + self.assertEqual(clone.childNodes.length, 0) + self.assertIsNone(clone.parentNode) + self.assertEqual(clone.toxml(), '') + + dom.unlink() + + def testCloneElementDeep(self): + dom, clone = self._setupCloneElement(1) + self.assertEqual(len(clone.childNodes), 1) + self.assertEqual(clone.childNodes.length, 1) + self.assertIsNone(clone.parentNode) + self.assertTrue(clone.toxml(), '') + dom.unlink() + + def testCloneDocumentShallow(self): + doc = parseString("\n" + "" + "\n" + "]>\n" + "") + doc2 = doc.cloneNode(0) + self.assertIsNone(doc2, + "testCloneDocumentShallow:" + " shallow cloning of documents makes no sense!") + + def testCloneDocumentDeep(self): + doc = parseString("\n" + "" + "\n" + "]>\n" + "") + doc2 = doc.cloneNode(1) + self.assertFalse((doc.isSameNode(doc2) or doc2.isSameNode(doc)), + "testCloneDocumentDeep: document objects not distinct") + self.assertEqual(len(doc.childNodes), len(doc2.childNodes), + "testCloneDocumentDeep: wrong number of Document children") + self.assertEqual(doc2.documentElement.nodeType, Node.ELEMENT_NODE, + "testCloneDocumentDeep: documentElement not an ELEMENT_NODE") + self.assertTrue(doc2.documentElement.ownerDocument.isSameNode(doc2), + "testCloneDocumentDeep: documentElement owner is not new document") + self.assertFalse(doc.documentElement.isSameNode(doc2.documentElement), + "testCloneDocumentDeep: documentElement should not be shared") + if doc.doctype is not None: + # check the doctype iff the original DOM maintained it + self.assertEqual(doc2.doctype.nodeType, Node.DOCUMENT_TYPE_NODE, + "testCloneDocumentDeep: doctype not a DOCUMENT_TYPE_NODE") + self.assertTrue(doc2.doctype.ownerDocument.isSameNode(doc2)) + self.assertFalse(doc.doctype.isSameNode(doc2.doctype)) + + def testCloneDocumentTypeDeepOk(self): + doctype = create_nonempty_doctype() + clone = doctype.cloneNode(1) + self.confirm(clone is not None + and clone.nodeName == doctype.nodeName + and clone.name == doctype.name + and clone.publicId == doctype.publicId + and clone.systemId == doctype.systemId + and len(clone.entities) == len(doctype.entities) + and clone.entities.item(len(clone.entities)) is None + and len(clone.notations) == len(doctype.notations) + and clone.notations.item(len(clone.notations)) is None + and len(clone.childNodes) == 0) + for i in range(len(doctype.entities)): + se = doctype.entities.item(i) + ce = clone.entities.item(i) + self.confirm((not se.isSameNode(ce)) + and (not ce.isSameNode(se)) + and ce.nodeName == se.nodeName + and ce.notationName == se.notationName + and ce.publicId == se.publicId + and ce.systemId == se.systemId + and ce.encoding == se.encoding + and ce.actualEncoding == se.actualEncoding + and ce.version == se.version) + for i in range(len(doctype.notations)): + sn = doctype.notations.item(i) + cn = clone.notations.item(i) + self.confirm((not sn.isSameNode(cn)) + and (not cn.isSameNode(sn)) + and cn.nodeName == sn.nodeName + and cn.publicId == sn.publicId + and cn.systemId == sn.systemId) + + def testCloneDocumentTypeDeepNotOk(self): + doc = create_doc_with_doctype() + clone = doc.doctype.cloneNode(1) + self.assertIsNone(clone) + + def testCloneDocumentTypeShallowOk(self): + doctype = create_nonempty_doctype() + clone = doctype.cloneNode(0) + self.confirm(clone is not None + and clone.nodeName == doctype.nodeName + and clone.name == doctype.name + and clone.publicId == doctype.publicId + and clone.systemId == doctype.systemId + and len(clone.entities) == 0 + and clone.entities.item(0) is None + and len(clone.notations) == 0 + and clone.notations.item(0) is None + and len(clone.childNodes) == 0) + + def testCloneDocumentTypeShallowNotOk(self): + doc = create_doc_with_doctype() + clone = doc.doctype.cloneNode(0) + self.assertIsNone(clone) + + def check_import_document(self, deep, testName): + doc1 = parseString("") + doc2 = parseString("") + self.assertRaises(xml.dom.NotSupportedErr, doc1.importNode, doc2, deep) + + def testImportDocumentShallow(self): + self.check_import_document(0, "testImportDocumentShallow") + + def testImportDocumentDeep(self): + self.check_import_document(1, "testImportDocumentDeep") + + def testImportDocumentTypeShallow(self): + src = create_doc_with_doctype() + target = create_doc_without_doctype() + self.assertRaises(xml.dom.NotSupportedErr, target.importNode, + src.doctype, 0) + + def testImportDocumentTypeDeep(self): + src = create_doc_with_doctype() + target = create_doc_without_doctype() + self.assertRaises(xml.dom.NotSupportedErr, target.importNode, + src.doctype, 1) + + # Testing attribute clones uses a helper, and should always be deep, + # even if the argument to cloneNode is false. + def check_clone_attribute(self, deep, testName): + doc = parseString("") + attr = doc.documentElement.getAttributeNode("attr") + self.assertIsNotNone(attr) + clone = attr.cloneNode(deep) + self.assertFalse(clone.isSameNode(attr)) + self.assertFalse(attr.isSameNode(clone)) + self.assertIsNone(clone.ownerElement, + testName + ": ownerElement should be None") + self.confirm(clone.ownerDocument.isSameNode(attr.ownerDocument), + testName + ": ownerDocument does not match") + self.confirm(clone.specified, + testName + ": cloned attribute must have specified == True") + + def testCloneAttributeShallow(self): + self.check_clone_attribute(0, "testCloneAttributeShallow") + + def testCloneAttributeDeep(self): + self.check_clone_attribute(1, "testCloneAttributeDeep") + + def check_clone_pi(self, deep, testName): + doc = parseString("") + pi = doc.firstChild + self.assertEqual(pi.nodeType, Node.PROCESSING_INSTRUCTION_NODE) + clone = pi.cloneNode(deep) + self.confirm(clone.target == pi.target + and clone.data == pi.data) + + def testClonePIShallow(self): + self.check_clone_pi(0, "testClonePIShallow") + + def testClonePIDeep(self): + self.check_clone_pi(1, "testClonePIDeep") + + def check_clone_node_entity(self, clone_document): + # bpo-35052: Test user data handler in cloneNode() on a document with + # an entity + document = xml.dom.minidom.parseString(""" + + ] + > + Don't let entities make you frown ⌣ + """.strip()) + + class Handler: + def handle(self, operation, key, data, src, dst): + self.operation = operation + self.key = key + self.data = data + self.src = src + self.dst = dst + + handler = Handler() + doctype = document.doctype + entity = doctype.entities['smile'] + entity.setUserData("key", "data", handler) + + if clone_document: + # clone Document + clone = document.cloneNode(deep=True) + + self.assertEqual(clone.documentElement.firstChild.wholeText, + "Don't let entities make you frown ☺") + operation = xml.dom.UserDataHandler.NODE_IMPORTED + dst = clone.doctype.entities['smile'] + else: + # clone DocumentType + with support.swap_attr(doctype, 'ownerDocument', None): + clone = doctype.cloneNode(deep=True) + + operation = xml.dom.UserDataHandler.NODE_CLONED + dst = clone.entities['smile'] + + self.assertEqual(handler.operation, operation) + self.assertEqual(handler.key, "key") + self.assertEqual(handler.data, "data") + self.assertIs(handler.src, entity) + self.assertIs(handler.dst, dst) + + def testCloneNodeEntity(self): + self.check_clone_node_entity(False) + self.check_clone_node_entity(True) + + def testNormalize(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createTextNode("first")) + root.appendChild(doc.createTextNode("second")) + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2, + "testNormalize -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 1 + and root.childNodes.length == 1 + and root.firstChild is root.lastChild + and root.firstChild.data == "firstsecond" + , "testNormalize -- result") + doc.unlink() + + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createTextNode("")) + doc.normalize() + self.confirm(len(root.childNodes) == 0 + and root.childNodes.length == 0, + "testNormalize -- single empty node removed") + doc.unlink() + + def testNormalizeCombineAndNextSibling(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createTextNode("first")) + root.appendChild(doc.createTextNode("second")) + root.appendChild(doc.createElement("i")) + self.confirm(len(root.childNodes) == 3 + and root.childNodes.length == 3, + "testNormalizeCombineAndNextSibling -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2 + and root.firstChild.data == "firstsecond" + and root.firstChild is not root.lastChild + and root.firstChild.nextSibling is root.lastChild + and root.firstChild.previousSibling is None + and root.lastChild.previousSibling is root.firstChild + and root.lastChild.nextSibling is None + , "testNormalizeCombinedAndNextSibling -- result") + doc.unlink() + + def testNormalizeDeleteWithPrevSibling(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createTextNode("first")) + root.appendChild(doc.createTextNode("")) + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2, + "testNormalizeDeleteWithPrevSibling -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 1 + and root.childNodes.length == 1 + and root.firstChild.data == "first" + and root.firstChild is root.lastChild + and root.firstChild.nextSibling is None + and root.firstChild.previousSibling is None + , "testNormalizeDeleteWithPrevSibling -- result") + doc.unlink() + + def testNormalizeDeleteWithNextSibling(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createTextNode("")) + root.appendChild(doc.createTextNode("second")) + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2, + "testNormalizeDeleteWithNextSibling -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 1 + and root.childNodes.length == 1 + and root.firstChild.data == "second" + and root.firstChild is root.lastChild + and root.firstChild.nextSibling is None + and root.firstChild.previousSibling is None + , "testNormalizeDeleteWithNextSibling -- result") + doc.unlink() + + def testNormalizeDeleteWithTwoNonTextSiblings(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createElement("i")) + root.appendChild(doc.createTextNode("")) + root.appendChild(doc.createElement("i")) + self.confirm(len(root.childNodes) == 3 + and root.childNodes.length == 3, + "testNormalizeDeleteWithTwoSiblings -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2 + and root.firstChild is not root.lastChild + and root.firstChild.nextSibling is root.lastChild + and root.firstChild.previousSibling is None + and root.lastChild.previousSibling is root.firstChild + and root.lastChild.nextSibling is None + , "testNormalizeDeleteWithTwoSiblings -- result") + doc.unlink() + + def testNormalizeDeleteAndCombine(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createTextNode("")) + root.appendChild(doc.createTextNode("second")) + root.appendChild(doc.createTextNode("")) + root.appendChild(doc.createTextNode("fourth")) + root.appendChild(doc.createTextNode("")) + self.confirm(len(root.childNodes) == 5 + and root.childNodes.length == 5, + "testNormalizeDeleteAndCombine -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 1 + and root.childNodes.length == 1 + and root.firstChild is root.lastChild + and root.firstChild.data == "secondfourth" + and root.firstChild.previousSibling is None + and root.firstChild.nextSibling is None + , "testNormalizeDeleteAndCombine -- result") + doc.unlink() + + def testNormalizeRecursion(self): + doc = parseString("" + "" + "" + "t" + # + #x + "" + "" + "" + "t2" + #x2 + "" + "t3" + #x3 + "" + # + "") + root = doc.documentElement + root.childNodes[0].appendChild(doc.createTextNode("")) + root.childNodes[0].appendChild(doc.createTextNode("x")) + root.childNodes[1].childNodes[0].appendChild(doc.createTextNode("x2")) + root.childNodes[1].appendChild(doc.createTextNode("x3")) + root.appendChild(doc.createTextNode("")) + self.confirm(len(root.childNodes) == 3 + and root.childNodes.length == 3 + and len(root.childNodes[0].childNodes) == 4 + and root.childNodes[0].childNodes.length == 4 + and len(root.childNodes[1].childNodes) == 3 + and root.childNodes[1].childNodes.length == 3 + and len(root.childNodes[1].childNodes[0].childNodes) == 2 + and root.childNodes[1].childNodes[0].childNodes.length == 2 + , "testNormalize2 -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2 + and len(root.childNodes[0].childNodes) == 2 + and root.childNodes[0].childNodes.length == 2 + and len(root.childNodes[1].childNodes) == 2 + and root.childNodes[1].childNodes.length == 2 + and len(root.childNodes[1].childNodes[0].childNodes) == 1 + and root.childNodes[1].childNodes[0].childNodes.length == 1 + , "testNormalize2 -- childNodes lengths") + self.confirm(root.childNodes[0].childNodes[1].data == "tx" + and root.childNodes[1].childNodes[0].childNodes[0].data == "t2x2" + and root.childNodes[1].childNodes[1].data == "t3x3" + , "testNormalize2 -- joined text fields") + self.confirm(root.childNodes[0].childNodes[1].nextSibling is None + and root.childNodes[0].childNodes[1].previousSibling + is root.childNodes[0].childNodes[0] + and root.childNodes[0].childNodes[0].previousSibling is None + and root.childNodes[0].childNodes[0].nextSibling + is root.childNodes[0].childNodes[1] + and root.childNodes[1].childNodes[1].nextSibling is None + and root.childNodes[1].childNodes[1].previousSibling + is root.childNodes[1].childNodes[0] + and root.childNodes[1].childNodes[0].previousSibling is None + and root.childNodes[1].childNodes[0].nextSibling + is root.childNodes[1].childNodes[1] + , "testNormalize2 -- sibling pointers") + doc.unlink() + + + def testBug0777884(self): + doc = parseString("text") + text = doc.documentElement.childNodes[0] + self.assertEqual(text.nodeType, Node.TEXT_NODE) + # Should run quietly, doing nothing. + text.normalize() + doc.unlink() + + def testBug1433694(self): + doc = parseString("t") + node = doc.documentElement + node.childNodes[1].nodeValue = "" + node.normalize() + self.assertIsNone(node.childNodes[-1].nextSibling, + "Final child's .nextSibling should be None") + + def testSiblings(self): + doc = parseString("text?") + root = doc.documentElement + (pi, text, elm) = root.childNodes + + self.confirm(pi.nextSibling is text and + pi.previousSibling is None and + text.nextSibling is elm and + text.previousSibling is pi and + elm.nextSibling is None and + elm.previousSibling is text, "testSiblings") + + doc.unlink() + + def testParents(self): + doc = parseString( + "") + root = doc.documentElement + elm1 = root.childNodes[0] + (elm2a, elm2b) = elm1.childNodes + elm3 = elm2b.childNodes[0] + + self.confirm(root.parentNode is doc and + elm1.parentNode is root and + elm2a.parentNode is elm1 and + elm2b.parentNode is elm1 and + elm3.parentNode is elm2b, "testParents") + doc.unlink() + + def testNodeListItem(self): + doc = parseString("") + children = doc.childNodes + docelem = children[0] + self.confirm(children[0] is children.item(0) + and children.item(1) is None + and docelem.childNodes.item(0) is docelem.childNodes[0] + and docelem.childNodes.item(1) is docelem.childNodes[1] + and docelem.childNodes.item(0).childNodes.item(0) is None, + "test NodeList.item()") + doc.unlink() + + def testEncodings(self): + doc = parseString('') + self.assertEqual(doc.toxml(), + '\u20ac') + self.assertEqual(doc.toxml('utf-8'), + b'\xe2\x82\xac') + self.assertEqual(doc.toxml('iso-8859-15'), + b'\xa4') + self.assertEqual(doc.toxml('us-ascii'), + b'') + self.assertEqual(doc.toxml('utf-16'), + '' + '\u20ac'.encode('utf-16')) + + # Verify that character decoding errors raise exceptions instead + # of crashing + with self.assertRaises((UnicodeDecodeError, ExpatError)): + parseString( + b'Comment \xe7a va ? Tr\xe8s bien ?' + ) + + doc.unlink() + + def testStandalone(self): + doc = parseString('') + self.assertEqual(doc.toxml(), + '\u20ac') + self.assertEqual(doc.toxml(standalone=None), + '\u20ac') + self.assertEqual(doc.toxml(standalone=True), + '\u20ac') + self.assertEqual(doc.toxml(standalone=False), + '\u20ac') + self.assertEqual(doc.toxml('utf-8', True), + b'' + b'\xe2\x82\xac') + + doc.unlink() + + class UserDataHandler: + called = 0 + def handle(self, operation, key, data, src, dst): + dst.setUserData(key, data + 1, self) + src.setUserData(key, None, None) + self.called = 1 + + def testUserData(self): + dom = Document() + n = dom.createElement('e') + self.assertIsNone(n.getUserData("foo")) + n.setUserData("foo", None, None) + self.assertIsNone(n.getUserData("foo")) + n.setUserData("foo", 12, 12) + n.setUserData("bar", 13, 13) + self.assertEqual(n.getUserData("foo"), 12) + self.assertEqual(n.getUserData("bar"), 13) + n.setUserData("foo", None, None) + self.assertIsNone(n.getUserData("foo")) + self.assertEqual(n.getUserData("bar"), 13) + + handler = self.UserDataHandler() + n.setUserData("bar", 12, handler) + c = n.cloneNode(1) + self.confirm(handler.called + and n.getUserData("bar") is None + and c.getUserData("bar") == 13) + n.unlink() + c.unlink() + dom.unlink() + + def checkRenameNodeSharedConstraints(self, doc, node): + # Make sure illegal NS usage is detected: + self.assertRaises(xml.dom.NamespaceErr, doc.renameNode, node, + "http://xml.python.org/ns", "xmlns:foo") + doc2 = parseString("") + self.assertRaises(xml.dom.WrongDocumentErr, doc2.renameNode, node, + xml.dom.EMPTY_NAMESPACE, "foo") + + def testRenameAttribute(self): + doc = parseString("") + elem = doc.documentElement + attrmap = elem.attributes + attr = elem.attributes['a'] + + # Simple renaming + attr = doc.renameNode(attr, xml.dom.EMPTY_NAMESPACE, "b") + self.confirm(attr.name == "b" + and attr.nodeName == "b" + and attr.localName is None + and attr.namespaceURI == xml.dom.EMPTY_NAMESPACE + and attr.prefix is None + and attr.value == "v" + and elem.getAttributeNode("a") is None + and elem.getAttributeNode("b").isSameNode(attr) + and attrmap["b"].isSameNode(attr) + and attr.ownerDocument.isSameNode(doc) + and attr.ownerElement.isSameNode(elem)) + + # Rename to have a namespace, no prefix + attr = doc.renameNode(attr, "http://xml.python.org/ns", "c") + self.confirm(attr.name == "c" + and attr.nodeName == "c" + and attr.localName == "c" + and attr.namespaceURI == "http://xml.python.org/ns" + and attr.prefix is None + and attr.value == "v" + and elem.getAttributeNode("a") is None + and elem.getAttributeNode("b") is None + and elem.getAttributeNode("c").isSameNode(attr) + and elem.getAttributeNodeNS( + "http://xml.python.org/ns", "c").isSameNode(attr) + and attrmap["c"].isSameNode(attr) + and attrmap[("http://xml.python.org/ns", "c")].isSameNode(attr)) + + # Rename to have a namespace, with prefix + attr = doc.renameNode(attr, "http://xml.python.org/ns2", "p:d") + self.confirm(attr.name == "p:d" + and attr.nodeName == "p:d" + and attr.localName == "d" + and attr.namespaceURI == "http://xml.python.org/ns2" + and attr.prefix == "p" + and attr.value == "v" + and elem.getAttributeNode("a") is None + and elem.getAttributeNode("b") is None + and elem.getAttributeNode("c") is None + and elem.getAttributeNodeNS( + "http://xml.python.org/ns", "c") is None + and elem.getAttributeNode("p:d").isSameNode(attr) + and elem.getAttributeNodeNS( + "http://xml.python.org/ns2", "d").isSameNode(attr) + and attrmap["p:d"].isSameNode(attr) + and attrmap[("http://xml.python.org/ns2", "d")].isSameNode(attr)) + + # Rename back to a simple non-NS node + attr = doc.renameNode(attr, xml.dom.EMPTY_NAMESPACE, "e") + self.confirm(attr.name == "e" + and attr.nodeName == "e" + and attr.localName is None + and attr.namespaceURI == xml.dom.EMPTY_NAMESPACE + and attr.prefix is None + and attr.value == "v" + and elem.getAttributeNode("a") is None + and elem.getAttributeNode("b") is None + and elem.getAttributeNode("c") is None + and elem.getAttributeNode("p:d") is None + and elem.getAttributeNodeNS( + "http://xml.python.org/ns", "c") is None + and elem.getAttributeNode("e").isSameNode(attr) + and attrmap["e"].isSameNode(attr)) + + self.assertRaises(xml.dom.NamespaceErr, doc.renameNode, attr, + "http://xml.python.org/ns", "xmlns") + self.checkRenameNodeSharedConstraints(doc, attr) + doc.unlink() + + def testRenameElement(self): + doc = parseString("") + elem = doc.documentElement + + # Simple renaming + elem = doc.renameNode(elem, xml.dom.EMPTY_NAMESPACE, "a") + self.confirm(elem.tagName == "a" + and elem.nodeName == "a" + and elem.localName is None + and elem.namespaceURI == xml.dom.EMPTY_NAMESPACE + and elem.prefix is None + and elem.ownerDocument.isSameNode(doc)) + + # Rename to have a namespace, no prefix + elem = doc.renameNode(elem, "http://xml.python.org/ns", "b") + self.confirm(elem.tagName == "b" + and elem.nodeName == "b" + and elem.localName == "b" + and elem.namespaceURI == "http://xml.python.org/ns" + and elem.prefix is None + and elem.ownerDocument.isSameNode(doc)) + + # Rename to have a namespace, with prefix + elem = doc.renameNode(elem, "http://xml.python.org/ns2", "p:c") + self.confirm(elem.tagName == "p:c" + and elem.nodeName == "p:c" + and elem.localName == "c" + and elem.namespaceURI == "http://xml.python.org/ns2" + and elem.prefix == "p" + and elem.ownerDocument.isSameNode(doc)) + + # Rename back to a simple non-NS node + elem = doc.renameNode(elem, xml.dom.EMPTY_NAMESPACE, "d") + self.confirm(elem.tagName == "d" + and elem.nodeName == "d" + and elem.localName is None + and elem.namespaceURI == xml.dom.EMPTY_NAMESPACE + and elem.prefix is None + and elem.ownerDocument.isSameNode(doc)) + + self.checkRenameNodeSharedConstraints(doc, elem) + doc.unlink() + + def testRenameOther(self): + # We have to create a comment node explicitly since not all DOM + # builders used with minidom add comments to the DOM. + doc = xml.dom.minidom.getDOMImplementation().createDocument( + xml.dom.EMPTY_NAMESPACE, "e", None) + node = doc.createComment("comment") + self.assertRaises(xml.dom.NotSupportedErr, doc.renameNode, node, + xml.dom.EMPTY_NAMESPACE, "foo") + doc.unlink() + + def testWholeText(self): + doc = parseString("a") + elem = doc.documentElement + text = elem.childNodes[0] + self.assertEqual(text.nodeType, Node.TEXT_NODE) + + self.checkWholeText(text, "a") + elem.appendChild(doc.createTextNode("b")) + self.checkWholeText(text, "ab") + elem.insertBefore(doc.createCDATASection("c"), text) + self.checkWholeText(text, "cab") + + # make sure we don't cross other nodes + splitter = doc.createComment("comment") + elem.appendChild(splitter) + text2 = doc.createTextNode("d") + elem.appendChild(text2) + self.checkWholeText(text, "cab") + self.checkWholeText(text2, "d") + + x = doc.createElement("x") + elem.replaceChild(x, splitter) + splitter = x + self.checkWholeText(text, "cab") + self.checkWholeText(text2, "d") + + x = doc.createProcessingInstruction("y", "z") + elem.replaceChild(x, splitter) + splitter = x + self.checkWholeText(text, "cab") + self.checkWholeText(text2, "d") + + elem.removeChild(splitter) + self.checkWholeText(text, "cabd") + self.checkWholeText(text2, "cabd") + + def testPatch1094164(self): + doc = parseString("") + elem = doc.documentElement + e = elem.firstChild + self.assertIs(e.parentNode, elem, "Before replaceChild()") + # Check that replacing a child with itself leaves the tree unchanged + elem.replaceChild(e, e) + self.assertIs(e.parentNode, elem, "After replaceChild()") + + def testReplaceWholeText(self): + def setup(): + doc = parseString("ad") + elem = doc.documentElement + text1 = elem.firstChild + text2 = elem.lastChild + splitter = text1.nextSibling + elem.insertBefore(doc.createTextNode("b"), splitter) + elem.insertBefore(doc.createCDATASection("c"), text1) + return doc, elem, text1, splitter, text2 + + doc, elem, text1, splitter, text2 = setup() + text = text1.replaceWholeText("new content") + self.checkWholeText(text, "new content") + self.checkWholeText(text2, "d") + self.assertEqual(len(elem.childNodes), 3) + + doc, elem, text1, splitter, text2 = setup() + text = text2.replaceWholeText("new content") + self.checkWholeText(text, "new content") + self.checkWholeText(text1, "cab") + self.assertEqual(len(elem.childNodes), 5) + + doc, elem, text1, splitter, text2 = setup() + text = text1.replaceWholeText("") + self.checkWholeText(text2, "d") + self.confirm(text is None + and len(elem.childNodes) == 2) + + def testSchemaType(self): + doc = parseString( + "\n" + " \n" + " \n" + "]>") + elem = doc.documentElement + # We don't want to rely on any specific loader at this point, so + # just make sure we can get to all the names, and that the + # DTD-based namespace is right. The names can vary by loader + # since each supports a different level of DTD information. + t = elem.schemaType + self.confirm(t.name is None + and t.namespace == xml.dom.EMPTY_NAMESPACE) + names = "id notid text enum ref refs ent ents nm nms".split() + for name in names: + a = elem.getAttributeNode(name) + t = a.schemaType + self.confirm(hasattr(t, "name") + and t.namespace == xml.dom.EMPTY_NAMESPACE) + + def testSetIdAttribute(self): + doc = parseString("") + e = doc.documentElement + a1 = e.getAttributeNode("a1") + a2 = e.getAttributeNode("a2") + self.confirm(doc.getElementById("v") is None + and not a1.isId + and not a2.isId) + e.setIdAttribute("a1") + self.confirm(e.isSameNode(doc.getElementById("v")) + and a1.isId + and not a2.isId) + e.setIdAttribute("a2") + self.confirm(e.isSameNode(doc.getElementById("v")) + and e.isSameNode(doc.getElementById("w")) + and a1.isId + and a2.isId) + # replace the a1 node; the new node should *not* be an ID + a3 = doc.createAttribute("a1") + a3.value = "v" + e.setAttributeNode(a3) + self.confirm(doc.getElementById("v") is None + and e.isSameNode(doc.getElementById("w")) + and not a1.isId + and a2.isId + and not a3.isId) + # renaming an attribute should not affect its ID-ness: + doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an") + self.confirm(e.isSameNode(doc.getElementById("w")) + and a2.isId) + + def testSetIdAttributeNS(self): + NS1 = "http://xml.python.org/ns1" + NS2 = "http://xml.python.org/ns2" + doc = parseString("") + e = doc.documentElement + a1 = e.getAttributeNodeNS(NS1, "a1") + a2 = e.getAttributeNodeNS(NS2, "a2") + self.confirm(doc.getElementById("v") is None + and not a1.isId + and not a2.isId) + e.setIdAttributeNS(NS1, "a1") + self.confirm(e.isSameNode(doc.getElementById("v")) + and a1.isId + and not a2.isId) + e.setIdAttributeNS(NS2, "a2") + self.confirm(e.isSameNode(doc.getElementById("v")) + and e.isSameNode(doc.getElementById("w")) + and a1.isId + and a2.isId) + # replace the a1 node; the new node should *not* be an ID + a3 = doc.createAttributeNS(NS1, "a1") + a3.value = "v" + e.setAttributeNode(a3) + self.assertTrue(e.isSameNode(doc.getElementById("w"))) + self.assertFalse(a1.isId) + self.assertTrue(a2.isId) + self.assertFalse(a3.isId) + self.assertIsNone(doc.getElementById("v")) + # renaming an attribute should not affect its ID-ness: + doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an") + self.confirm(e.isSameNode(doc.getElementById("w")) + and a2.isId) + + def testSetIdAttributeNode(self): + NS1 = "http://xml.python.org/ns1" + NS2 = "http://xml.python.org/ns2" + doc = parseString("") + e = doc.documentElement + a1 = e.getAttributeNodeNS(NS1, "a1") + a2 = e.getAttributeNodeNS(NS2, "a2") + self.confirm(doc.getElementById("v") is None + and not a1.isId + and not a2.isId) + e.setIdAttributeNode(a1) + self.confirm(e.isSameNode(doc.getElementById("v")) + and a1.isId + and not a2.isId) + e.setIdAttributeNode(a2) + self.confirm(e.isSameNode(doc.getElementById("v")) + and e.isSameNode(doc.getElementById("w")) + and a1.isId + and a2.isId) + # replace the a1 node; the new node should *not* be an ID + a3 = doc.createAttributeNS(NS1, "a1") + a3.value = "v" + e.setAttributeNode(a3) + self.assertTrue(e.isSameNode(doc.getElementById("w"))) + self.assertFalse(a1.isId) + self.assertTrue(a2.isId) + self.assertFalse(a3.isId) + self.assertIsNone(doc.getElementById("v")) + # renaming an attribute should not affect its ID-ness: + doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an") + self.confirm(e.isSameNode(doc.getElementById("w")) + and a2.isId) + + def assert_recursive_equal(self, doc, doc2): + stack = [(doc, doc2)] + while stack: + n1, n2 = stack.pop() + self.assertEqual(n1.nodeType, n2.nodeType) + self.assertEqual(len(n1.childNodes), len(n2.childNodes)) + self.assertEqual(n1.nodeName, n2.nodeName) + self.assertFalse(n1.isSameNode(n2)) + self.assertFalse(n2.isSameNode(n1)) + if n1.nodeType == Node.DOCUMENT_TYPE_NODE: + len(n1.entities) + len(n2.entities) + len(n1.notations) + len(n2.notations) + self.assertEqual(len(n1.entities), len(n2.entities)) + self.assertEqual(len(n1.notations), len(n2.notations)) + for i in range(len(n1.notations)): + # XXX this loop body doesn't seem to be executed? + no1 = n1.notations.item(i) + no2 = n1.notations.item(i) + self.assertEqual(no1.name, no2.name) + self.assertEqual(no1.publicId, no2.publicId) + self.assertEqual(no1.systemId, no2.systemId) + stack.append((no1, no2)) + for i in range(len(n1.entities)): + e1 = n1.entities.item(i) + e2 = n2.entities.item(i) + self.assertEqual(e1.notationName, e2.notationName) + self.assertEqual(e1.publicId, e2.publicId) + self.assertEqual(e1.systemId, e2.systemId) + stack.append((e1, e2)) + if n1.nodeType != Node.DOCUMENT_NODE: + self.assertTrue(n1.ownerDocument.isSameNode(doc)) + self.assertTrue(n2.ownerDocument.isSameNode(doc2)) + for i in range(len(n1.childNodes)): + stack.append((n1.childNodes[i], n2.childNodes[i])) + + def testPickledDocument(self): + doc = parseString(sample) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + s = pickle.dumps(doc, proto) + doc2 = pickle.loads(s) + self.assert_recursive_equal(doc, doc2) + + def testDeepcopiedDocument(self): + doc = parseString(sample) + doc2 = copy.deepcopy(doc) + self.assert_recursive_equal(doc, doc2) + + def testSerializeCommentNodeWithDoubleHyphen(self): + doc = create_doc_without_doctype() + doc.appendChild(doc.createComment("foo--bar")) + self.assertRaises(ValueError, doc.toxml) + + + def testEmptyXMLNSValue(self): + doc = parseString("\n" + "\n") + doc2 = parseString(doc.toxml()) + self.assertEqual(doc2.namespaceURI, xml.dom.EMPTY_NAMESPACE) + + def testExceptionOnSpacesInXMLNSValue(self): + with self.assertRaises((ValueError, ExpatError)): + parseString( + '' + + '' + ) + + def testDocRemoveChild(self): + doc = parse(tstfile) + title_tag = doc.documentElement.getElementsByTagName("TITLE")[0] + self.assertRaises( xml.dom.NotFoundErr, doc.removeChild, title_tag) + num_children_before = len(doc.childNodes) + doc.removeChild(doc.childNodes[0]) + num_children_after = len(doc.childNodes) + self.assertEqual(num_children_after, num_children_before - 1) + + def testProcessingInstructionNameError(self): + # wrong variable in .nodeValue property will + # lead to "NameError: name 'data' is not defined" + doc = parse(tstfile) + pi = doc.createProcessingInstruction("y", "z") + pi.nodeValue = "crash" + + def test_minidom_attribute_order(self): + xml_str = '' + doc = parseString(xml_str) + output = io.StringIO() + doc.writexml(output) + self.assertEqual(output.getvalue(), xml_str) + + def test_toxml_with_attributes_ordered(self): + xml_str = '' + doc = parseString(xml_str) + self.assertEqual(doc.toxml(), xml_str) + + def test_toprettyxml_with_attributes_ordered(self): + xml_str = '' + doc = parseString(xml_str) + self.assertEqual(doc.toprettyxml(), + '\n' + '\n') + + def test_toprettyxml_with_cdata(self): + xml_str = ']]>' + doc = parseString(xml_str) + self.assertEqual(doc.toprettyxml(), + '\n' + '\n' + '\t]]>\n' + '\n') + + def test_cdata_parsing(self): + xml_str = ']]>' + dom1 = parseString(xml_str) + self.checkWholeText(dom1.getElementsByTagName('node')[0].firstChild, '') + dom2 = parseString(dom1.toprettyxml()) + self.checkWholeText(dom2.getElementsByTagName('node')[0].firstChild, '') + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py index 1308c73be..f91fa1f8a 100644 --- a/Lib/test/test_pulldom.py +++ b/Lib/test/test_pulldom.py @@ -23,8 +23,8 @@ SMALL_SAMPLE = """ class PullDOMTestCase(unittest.TestCase): - # TODO: RUSTPYTHON FileNotFoundError: [Errno 2] No such file or directory (os error 2): 'xmltestdata/test.xml' -> 'None' - @unittest.expectedFailure + + @unittest.expectedFailure # TODO: RUSTPYTHON; FileNotFoundError: [Errno 2] No such file or directory (os error 2): 'xmltestdata/test.xml' -> 'None' def test_parse(self): """Minimal test of DOMEventStream.parse()""" @@ -41,15 +41,14 @@ class PullDOMTestCase(unittest.TestCase): with open(tstfile, "rb") as fin: list(pulldom.parse(fin)) - # TODO: RUSTPYTHON implement DOM semantic - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; implement DOM semantic def test_parse_semantics(self): """Test DOMEventStream parsing semantics.""" items = pulldom.parseString(SMALL_SAMPLE) evt, node = next(items) # Just check the node is a Document: - self.assertTrue(hasattr(node, "createElement")) + self.assertHasAttr(node, "createElement") self.assertEqual(pulldom.START_DOCUMENT, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) @@ -105,8 +104,7 @@ class PullDOMTestCase(unittest.TestCase): #evt, node = next(items) #self.assertEqual(pulldom.END_DOCUMENT, evt) - # TODO: RUSTPYTHON pulldom.parseString(SMALL_SAMPLE) return iterator with tuple with 2 elements - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; pulldom.parseString(SMALL_SAMPLE) return iterator with tuple with 2 elements def test_expandItem(self): """Ensure expandItem works as expected.""" items = pulldom.parseString(SMALL_SAMPLE) @@ -197,7 +195,7 @@ class ThoroughTestCase(unittest.TestCase): evt, node = next(pd) self.assertEqual(pulldom.START_DOCUMENT, evt) # Just check the node is a Document: - self.assertTrue(hasattr(node, "createElement")) + self.assertHasAttr(node, "createElement") if before_root: evt, node = next(pd) @@ -303,8 +301,7 @@ class SAX2DOMTestCase(unittest.TestCase): def confirm(self, test, testname="Test"): self.assertTrue(test, testname) - # TODO: RUSTPYTHON read from stream io - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; read from stream io def test_basic(self): """Ensure SAX2DOM can parse from a stream.""" with io.StringIO(SMALL_SAMPLE) as fin: diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index d360e8cd6..ec2be4d5c 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -1,14 +1,18 @@ # XXX TypeErrors on calling handlers, or on bad return values from a # handler, are obscure and unhelpful. +import abc +import functools import os +import re import sys import sysconfig +import textwrap import unittest import traceback from io import BytesIO from test import support -from test.support import os_helper +from test.support import import_helper, os_helper from xml.parsers import expat from xml.parsers.expat import errors @@ -261,7 +265,7 @@ class ParseTest(unittest.TestCase): operations = out.out self._verify_parse_output(operations) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_parse_again(self): parser = expat.ParserCreate() file = BytesIO(data) @@ -282,7 +286,7 @@ class NamespaceSeparatorTest(unittest.TestCase): expat.ParserCreate(namespace_separator=None) expat.ParserCreate(namespace_separator=' ') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_illegal(self): with self.assertRaisesRegex(TypeError, r"ParserCreate\(\) argument (2|'namespace_separator') " @@ -309,7 +313,7 @@ class NamespaceSeparatorTest(unittest.TestCase): class InterningTest(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test(self): # Test the interning machinery. p = expat.ParserCreate() @@ -325,7 +329,7 @@ class InterningTest(unittest.TestCase): # L should have the same string repeated over and over. self.assertTrue(tag is entry) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_issue9402(self): # create an ExternalEntityParserCreate with buffer text class ExternalOutputter: @@ -383,7 +387,7 @@ class BufferTextTest(unittest.TestCase): parser = expat.ParserCreate() self.assertFalse(parser.buffer_text) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_buffering_enabled(self): # Make sure buffering is turned on self.assertTrue(self.parser.buffer_text) @@ -391,7 +395,7 @@ class BufferTextTest(unittest.TestCase): self.assertEqual(self.stuff, ['123'], "buffered text not properly collapsed") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test1(self): # XXX This test exposes more detail of Expat's text chunking than we # XXX like, but it tests what we need to concisely. @@ -401,7 +405,7 @@ class BufferTextTest(unittest.TestCase): ["", "1", "", "2", "\n", "3", "", "4\n5"], "buffering control not reacting as expected") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test2(self): self.parser.Parse(b"1<2> \n 3", True) self.assertEqual(self.stuff, ["1<2> \n 3"], @@ -434,7 +438,7 @@ class BufferTextTest(unittest.TestCase): ["", "1", "", "", "2", "", "", "345", ""], "buffered text not properly split") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test7(self): self.setHandlers(["CommentHandler", "EndElementHandler", "StartElementHandler"]) @@ -536,7 +540,7 @@ class PositionTest(unittest.TestCase): class sf1296433Test(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Expected type 'str' but 'bytes' found. + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Expected type 'str' but 'bytes' found. def test_parse_only_xml_data(self): # https://bugs.python.org/issue1296433 # @@ -560,15 +564,15 @@ class ChardataBufferTest(unittest.TestCase): test setting of chardata buffer size """ - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_1025_bytes(self): self.assertEqual(self.small_buffer_test(1025), 2) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_1000_bytes(self): self.assertEqual(self.small_buffer_test(1000), 1) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_wrong_size(self): parser = expat.ParserCreate() parser.buffer_text = 1 @@ -581,7 +585,7 @@ class ChardataBufferTest(unittest.TestCase): with self.assertRaises(TypeError): parser.buffer_size = 512.0 - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_unchanged_size(self): xml1 = b"" + b'a' * 512 xml2 = b'a'*512 + b'' @@ -605,7 +609,7 @@ class ChardataBufferTest(unittest.TestCase): self.assertEqual(self.n, 2) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_disabling_buffer(self): xml1 = b"" + b'a' * 512 xml2 = b'b' * 1024 @@ -650,7 +654,7 @@ class ChardataBufferTest(unittest.TestCase): parser.Parse(xml) return self.n - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_change_size_1(self): xml1 = b"" + b'a' * 1024 xml2 = b'aaa' + b'a' * 1025 + b'' @@ -667,7 +671,7 @@ class ChardataBufferTest(unittest.TestCase): parser.Parse(xml2, True) self.assertEqual(self.n, 2) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_change_size_2(self): xml1 = b"a" + b'a' * 1023 xml2 = b'aaa' + b'a' * 1025 + b'' @@ -684,8 +688,25 @@ class ChardataBufferTest(unittest.TestCase): parser.Parse(xml2, True) self.assertEqual(self.n, 4) +class ElementDeclHandlerTest(unittest.TestCase): + def test_trigger_leak(self): + # Unfixed, this test would leak the memory of the so-called + # "content model" in function ``my_ElementDeclHandler`` of pyexpat. + # See https://github.com/python/cpython/issues/140593. + data = textwrap.dedent('''\ + + ]> + + ''').encode('UTF-8') + + parser = expat.ParserCreate() + parser.NotStandaloneHandler = lambda: 1.234 # arbitrary float + parser.ElementDeclHandler = lambda _1, _2: None + self.assertRaises(TypeError, parser.Parse, data, True) + class MalformedInputTest(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test1(self): xml = b"\0\r\n" parser = expat.ParserCreate() @@ -695,7 +716,7 @@ class MalformedInputTest(unittest.TestCase): except expat.ExpatError as e: self.assertEqual(str(e), 'unclosed token: line 2, column 0') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test2(self): # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE) xml = b"\r\n" @@ -705,13 +726,13 @@ class MalformedInputTest(unittest.TestCase): parser.Parse(xml, True) class ErrorMessageTest(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_codes(self): # verify mapping of errors.codes and errors.messages self.assertEqual(errors.XML_ERROR_SYNTAX, errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_expaterror(self): xml = b'<' parser = expat.ParserCreate() @@ -727,7 +748,7 @@ class ForeignDTDTests(unittest.TestCase): """ Tests for the UseForeignDTD method of expat parser objects. """ - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_use_foreign_dtd(self): """ If UseForeignDTD is passed True and a document without an external @@ -756,7 +777,7 @@ class ForeignDTDTests(unittest.TestCase): parser.Parse(b"") self.assertEqual(handler_call_args, [(None, None)]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_ignore_use_foreign_dtd(self): """ If UseForeignDTD is passed True and a document with an external @@ -785,7 +806,7 @@ class ParentParserLifetimeTest(unittest.TestCase): See https://github.com/python/cpython/issues/139400. """ - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate' + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate' def test_parent_parser_outlives_its_subparsers__single(self): parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) @@ -794,7 +815,7 @@ class ParentParserLifetimeTest(unittest.TestCase): # while it's still being referenced by a related subparser. del parser - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate' + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate' def test_parent_parser_outlives_its_subparsers__multiple(self): parser = expat.ParserCreate() subparser_one = parser.ExternalEntityParserCreate(None) @@ -804,7 +825,7 @@ class ParentParserLifetimeTest(unittest.TestCase): # while it's still being referenced by a related subparser. del parser - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate' + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate' def test_parent_parser_outlives_its_subparsers__chain(self): parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) @@ -817,7 +838,7 @@ class ParentParserLifetimeTest(unittest.TestCase): class ReparseDeferralTest(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'GetReparseDeferralEnabled' + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'GetReparseDeferralEnabled' def test_getter_setter_round_trip(self): parser = expat.ParserCreate() enabled = (expat.version_info >= (2, 6, 0)) @@ -828,7 +849,7 @@ class ReparseDeferralTest(unittest.TestCase): parser.SetReparseDeferralEnabled(True) self.assertIs(parser.GetReparseDeferralEnabled(), enabled) - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'GetReparseDeferralEnabled' + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'GetReparseDeferralEnabled' def test_reparse_deferral_enabled(self): if expat.version_info < (2, 6, 0): self.skipTest(f'Expat {expat.version_info} does not ' @@ -853,7 +874,7 @@ class ReparseDeferralTest(unittest.TestCase): self.assertEqual(started, ['doc']) - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'SetReparseDeferralEnabled' + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'SetReparseDeferralEnabled' def test_reparse_deferral_disabled(self): started = [] @@ -873,5 +894,199 @@ class ReparseDeferralTest(unittest.TestCase): self.assertEqual(started, ['doc']) +class AttackProtectionTestBase(abc.ABC): + """ + Base class for testing protections against XML payloads with + disproportionate amplification. + + The protections being tested should detect and prevent attacks + that leverage disproportionate amplification from small inputs. + """ + + @staticmethod + def exponential_expansion_payload(*, nrows, ncols, text='.'): + """Create a billion laughs attack payload. + + Be careful: the number of total items is pow(n, k), thereby + requiring at least pow(ncols, nrows) * sizeof(text) memory! + """ + template = textwrap.dedent(f"""\ + + + + {{body}} + ]> + &row{nrows}; + """).rstrip() + + body = '\n'.join( + f'' + for i in range(nrows) + ) + body = textwrap.indent(body, ' ' * 4) + return template.format(body=body) + + def test_payload_generation(self): + # self-test for exponential_expansion_payload() + payload = self.exponential_expansion_payload(nrows=2, ncols=3) + self.assertEqual(payload, textwrap.dedent("""\ + + + + + + ]> + &row2; + """).rstrip()) + + def assert_root_parser_failure(self, func, /, *args, **kwargs): + """Check that func(*args, **kwargs) is invalid for a sub-parser.""" + msg = "parser must be a root parser" + self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) + + @abc.abstractmethod + def assert_rejected(self, func, /, *args, **kwargs): + """Assert that func(*args, **kwargs) triggers the attack protection. + + Note: this method must ensure that the attack protection being tested + is the one that is actually triggered at runtime, e.g., by matching + the exact error message. + """ + + @abc.abstractmethod + def set_activation_threshold(self, parser, threshold): + """Set the activation threshold for the tested protection.""" + + @abc.abstractmethod + def set_maximum_amplification(self, parser, max_factor): + """Set the maximum amplification factor for the tested protection.""" + + @abc.abstractmethod + def test_set_activation_threshold__threshold_reached(self): + """Test when the activation threshold is exceeded.""" + + @abc.abstractmethod + def test_set_activation_threshold__threshold_not_reached(self): + """Test when the activation threshold is not exceeded.""" + + def test_set_activation_threshold__invalid_threshold_type(self): + parser = expat.ParserCreate() + setter = functools.partial(self.set_activation_threshold, parser) + + self.assertRaises(TypeError, setter, 1.0) + self.assertRaises(TypeError, setter, -1.5) + self.assertRaises(ValueError, setter, -5) + + def test_set_activation_threshold__invalid_threshold_range(self): + _testcapi = import_helper.import_module("_testcapi") + parser = expat.ParserCreate() + setter = functools.partial(self.set_activation_threshold, parser) + + self.assertRaises(OverflowError, setter, _testcapi.ULLONG_MAX + 1) + + def test_set_activation_threshold__fail_for_subparser(self): + parser = expat.ParserCreate() + subparser = parser.ExternalEntityParserCreate(None) + setter = functools.partial(self.set_activation_threshold, subparser) + self.assert_root_parser_failure(setter, 12345) + + @abc.abstractmethod + def test_set_maximum_amplification__amplification_exceeded(self): + """Test when the amplification factor is exceeded.""" + + @abc.abstractmethod + def test_set_maximum_amplification__amplification_not_exceeded(self): + """Test when the amplification factor is not exceeded.""" + + def test_set_maximum_amplification__infinity(self): + inf = float('inf') # an 'inf' threshold is allowed by Expat + parser = expat.ParserCreate() + self.assertIsNone(self.set_maximum_amplification(parser, inf)) + + def test_set_maximum_amplification__invalid_max_factor_type(self): + parser = expat.ParserCreate() + setter = functools.partial(self.set_maximum_amplification, parser) + + self.assertRaises(TypeError, setter, None) + self.assertRaises(TypeError, setter, 'abc') + + def test_set_maximum_amplification__invalid_max_factor_range(self): + parser = expat.ParserCreate() + setter = functools.partial(self.set_maximum_amplification, parser) + + msg = re.escape("'max_factor' must be at least 1.0") + self.assertRaisesRegex(expat.ExpatError, msg, setter, float('nan')) + self.assertRaisesRegex(expat.ExpatError, msg, setter, 0.99) + + def test_set_maximum_amplification__fail_for_subparser(self): + parser = expat.ParserCreate() + subparser = parser.ExternalEntityParserCreate(None) + setter = functools.partial(self.set_maximum_amplification, subparser) + self.assert_root_parser_failure(setter, 123.45) + + +@unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2") +class MemoryProtectionTest(AttackProtectionTestBase, unittest.TestCase): + + # NOTE: with the default Expat configuration, the billion laughs protection + # may hit before the allocation limiter if exponential_expansion_payload() + # is not carefully parametrized. As such, the payloads should be chosen so + # that either the allocation limiter is hit before other protections are + # triggered or no protection at all is triggered. + + def assert_rejected(self, func, /, *args, **kwargs): + """Check that func(*args, **kwargs) hits the allocation limit.""" + msg = r"out of memory: line \d+, column \d+" + self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) + + def set_activation_threshold(self, parser, threshold): + return parser.SetAllocTrackerActivationThreshold(threshold) + + def set_maximum_amplification(self, parser, max_factor): + return parser.SetAllocTrackerMaximumAmplification(max_factor) + + def test_set_activation_threshold__threshold_reached(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be always reached. + self.set_activation_threshold(parser, 3) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(ncols=10, nrows=4) + self.assert_rejected(parser.Parse, payload, True) + + def test_set_activation_threshold__threshold_not_reached(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be never reached. + self.set_activation_threshold(parser, pow(10, 5)) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(ncols=10, nrows=4) + self.assertIsNotNone(parser.Parse(payload, True)) + + def test_set_maximum_amplification__amplification_exceeded(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to always be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + # Craft a payload for which the peak amplification factor is > 1.0. + payload = self.exponential_expansion_payload(ncols=1, nrows=2) + self.assert_rejected(parser.Parse, payload, True) + + def test_set_maximum_amplification__amplification_not_exceeded(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to never be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) + # Craft a payload for which the peak amplification factor is < 1e4. + payload = self.exponential_expansion_payload(ncols=1, nrows=2) + self.assertIsNotNone(parser.Parse(payload, True)) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py new file mode 100644 index 000000000..5c10bcedc --- /dev/null +++ b/Lib/test/test_sax.py @@ -0,0 +1,1577 @@ +# regression test for SAX 2.0 + +from xml.sax import make_parser, ContentHandler, \ + SAXException, SAXReaderNotAvailable, SAXParseException +import unittest +from unittest import mock +try: + make_parser() +except SAXReaderNotAvailable: + # don't try to test this module if we cannot create a parser + raise unittest.SkipTest("no XML parsers available") +from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ + XMLFilterBase, prepare_input_source +from xml.sax.expatreader import create_parser +from xml.sax.handler import (feature_namespaces, feature_external_ges, + LexicalHandler) +from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl +from xml import sax +from io import BytesIO, StringIO +import codecs +import os.path +import pyexpat +import shutil +import sys +from urllib.error import URLError +import urllib.request +from test.support import os_helper +from test.support import findfile, check__all__ +from test.support.os_helper import FakePath, TESTFN + + +TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") +TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") +try: + TEST_XMLFILE.encode("utf-8") + TEST_XMLFILE_OUT.encode("utf-8") +except UnicodeEncodeError: + raise unittest.SkipTest("filename is not encodable to utf8") + +supports_nonascii_filenames = True +if not os.path.supports_unicode_filenames: + try: + os_helper.TESTFN_UNICODE.encode(sys.getfilesystemencoding()) + except (UnicodeError, TypeError): + # Either the file system encoding is None, or the file name + # cannot be encoded in the file system encoding. + supports_nonascii_filenames = False +requires_nonascii_filenames = unittest.skipUnless( + supports_nonascii_filenames, + 'Requires non-ascii filenames support') + +ns_uri = "http://www.python.org/xml-ns/saxtest/" + +class XmlTestBase(unittest.TestCase): + def verify_empty_attrs(self, attrs): + self.assertRaises(KeyError, attrs.getValue, "attr") + self.assertRaises(KeyError, attrs.getValueByQName, "attr") + self.assertRaises(KeyError, attrs.getNameByQName, "attr") + self.assertRaises(KeyError, attrs.getQNameByName, "attr") + self.assertRaises(KeyError, attrs.__getitem__, "attr") + self.assertEqual(attrs.getLength(), 0) + self.assertEqual(attrs.getNames(), []) + self.assertEqual(attrs.getQNames(), []) + self.assertEqual(len(attrs), 0) + self.assertNotIn("attr", attrs) + self.assertEqual(list(attrs.keys()), []) + self.assertEqual(attrs.get("attrs"), None) + self.assertEqual(attrs.get("attrs", 25), 25) + self.assertEqual(list(attrs.items()), []) + self.assertEqual(list(attrs.values()), []) + + def verify_empty_nsattrs(self, attrs): + self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr")) + self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr") + self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr") + self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr")) + self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr")) + self.assertEqual(attrs.getLength(), 0) + self.assertEqual(attrs.getNames(), []) + self.assertEqual(attrs.getQNames(), []) + self.assertEqual(len(attrs), 0) + self.assertNotIn((ns_uri, "attr"), attrs) + self.assertEqual(list(attrs.keys()), []) + self.assertEqual(attrs.get((ns_uri, "attr")), None) + self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25) + self.assertEqual(list(attrs.items()), []) + self.assertEqual(list(attrs.values()), []) + + def verify_attrs_wattr(self, attrs): + self.assertEqual(attrs.getLength(), 1) + self.assertEqual(attrs.getNames(), ["attr"]) + self.assertEqual(attrs.getQNames(), ["attr"]) + self.assertEqual(len(attrs), 1) + self.assertIn("attr", attrs) + self.assertEqual(list(attrs.keys()), ["attr"]) + self.assertEqual(attrs.get("attr"), "val") + self.assertEqual(attrs.get("attr", 25), "val") + self.assertEqual(list(attrs.items()), [("attr", "val")]) + self.assertEqual(list(attrs.values()), ["val"]) + self.assertEqual(attrs.getValue("attr"), "val") + self.assertEqual(attrs.getValueByQName("attr"), "val") + self.assertEqual(attrs.getNameByQName("attr"), "attr") + self.assertEqual(attrs["attr"], "val") + self.assertEqual(attrs.getQNameByName("attr"), "attr") + + +def xml_str(doc, encoding=None): + if encoding is None: + return doc + return '\n%s' % (encoding, doc) + +def xml_bytes(doc, encoding, decl_encoding=...): + if decl_encoding is ...: + decl_encoding = encoding + return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace') + +def make_xml_file(doc, encoding, decl_encoding=...): + if decl_encoding is ...: + decl_encoding = encoding + with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f: + f.write(xml_str(doc, decl_encoding)) + + +class ParseTest(unittest.TestCase): + data = '$\xa3\u20ac\U0001017b' + + def tearDown(self): + os_helper.unlink(TESTFN) + + def check_parse(self, f): + from xml.sax import parse + result = StringIO() + parse(f, XMLGenerator(result, 'utf-8')) + self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) + + def test_parse_text(self): + encodings = ('us-ascii', 'iso-8859-1', 'utf-8', + 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parse(StringIO(xml_str(self.data, encoding))) + make_xml_file(self.data, encoding) + with open(TESTFN, 'r', encoding=encoding) as f: + self.check_parse(f) + self.check_parse(StringIO(self.data)) + make_xml_file(self.data, encoding, None) + with open(TESTFN, 'r', encoding=encoding) as f: + self.check_parse(f) + + def test_parse_bytes(self): + # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, + # UTF-16 is autodetected + encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parse(BytesIO(xml_bytes(self.data, encoding))) + make_xml_file(self.data, encoding) + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + self.check_parse(f) + self.check_parse(BytesIO(xml_bytes(self.data, encoding, None))) + make_xml_file(self.data, encoding, None) + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + self.check_parse(f) + # accept UTF-8 with BOM + self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))) + make_xml_file(self.data, 'utf-8-sig', 'utf-8') + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + self.check_parse(f) + self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None))) + make_xml_file(self.data, 'utf-8-sig', None) + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + self.check_parse(f) + # accept data with declared encoding + self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1'))) + make_xml_file(self.data, 'iso-8859-1') + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + self.check_parse(f) + # fail on non-UTF-8 incompatible data without declared encoding + with self.assertRaises(SAXException): + self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None))) + make_xml_file(self.data, 'iso-8859-1', None) + with self.assertRaises(SAXException): + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + with self.assertRaises(SAXException): + self.check_parse(f) + + def test_parse_path_object(self): + make_xml_file(self.data, 'utf-8', None) + self.check_parse(FakePath(TESTFN)) + + def test_parse_InputSource(self): + # accept data without declared but with explicitly specified encoding + make_xml_file(self.data, 'iso-8859-1', None) + with open(TESTFN, 'rb') as f: + input = InputSource() + input.setByteStream(f) + input.setEncoding('iso-8859-1') + self.check_parse(input) + + def test_parse_close_source(self): + builtin_open = open + fileobj = None + + def mock_open(*args): + nonlocal fileobj + fileobj = builtin_open(*args) + return fileobj + + with mock.patch('xml.sax.saxutils.open', side_effect=mock_open): + make_xml_file(self.data, 'iso-8859-1', None) + with self.assertRaises(SAXException): + self.check_parse(TESTFN) + self.assertTrue(fileobj.closed) + + def check_parseString(self, s): + from xml.sax import parseString + result = StringIO() + parseString(s, XMLGenerator(result, 'utf-8')) + self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) + + def test_parseString_text(self): + encodings = ('us-ascii', 'iso-8859-1', 'utf-8', + 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parseString(xml_str(self.data, encoding)) + self.check_parseString(self.data) + + def test_parseString_bytes(self): + # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, + # UTF-16 is autodetected + encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parseString(xml_bytes(self.data, encoding)) + self.check_parseString(xml_bytes(self.data, encoding, None)) + # accept UTF-8 with BOM + self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8')) + self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None)) + # accept data with declared encoding + self.check_parseString(xml_bytes(self.data, 'iso-8859-1')) + # fail on non-UTF-8 incompatible data without declared encoding + with self.assertRaises(SAXException): + self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None)) + +class MakeParserTest(unittest.TestCase): + def test_make_parser2(self): + # Creating parsers several times in a row should succeed. + # Testing this because there have been failures of this kind + # before. + from xml.sax import make_parser + p = make_parser() + from xml.sax import make_parser + p = make_parser() + from xml.sax import make_parser + p = make_parser() + from xml.sax import make_parser + p = make_parser() + from xml.sax import make_parser + p = make_parser() + from xml.sax import make_parser + p = make_parser() + + def test_make_parser3(self): + # Testing that make_parser can handle different types of + # iterables. + make_parser(['module']) + make_parser(('module', )) + make_parser({'module'}) + make_parser(frozenset({'module'})) + make_parser({'module': None}) + make_parser(iter(['module'])) + + def test_make_parser4(self): + # Testing that make_parser can handle empty iterables. + make_parser([]) + make_parser(tuple()) + make_parser(set()) + make_parser(frozenset()) + make_parser({}) + make_parser(iter([])) + + def test_make_parser5(self): + # Testing that make_parser can handle iterables with more than + # one item. + make_parser(['module1', 'module2']) + make_parser(('module1', 'module2')) + make_parser({'module1', 'module2'}) + make_parser(frozenset({'module1', 'module2'})) + make_parser({'module1': None, 'module2': None}) + make_parser(iter(['module1', 'module2'])) + +# =========================================================================== +# +# saxutils tests +# +# =========================================================================== + +class SaxutilsTest(unittest.TestCase): + # ===== escape + def test_escape_basic(self): + self.assertEqual(escape("Donald Duck & Co"), "Donald Duck & Co") + + def test_escape_all(self): + self.assertEqual(escape(""), + "<Donald Duck & Co>") + + def test_escape_extra(self): + self.assertEqual(escape("Hei på deg", {"å" : "å"}), + "Hei på deg") + + # ===== unescape + def test_unescape_basic(self): + self.assertEqual(unescape("Donald Duck & Co"), "Donald Duck & Co") + + def test_unescape_all(self): + self.assertEqual(unescape("<Donald Duck & Co>"), + "") + + def test_unescape_extra(self): + self.assertEqual(unescape("Hei på deg", {"å" : "å"}), + "Hei på deg") + + def test_unescape_amp_extra(self): + self.assertEqual(unescape("&foo;", {"&foo;": "splat"}), "&foo;") + + # ===== quoteattr + def test_quoteattr_basic(self): + self.assertEqual(quoteattr("Donald Duck & Co"), + '"Donald Duck & Co"') + + def test_single_quoteattr(self): + self.assertEqual(quoteattr('Includes "double" quotes'), + '\'Includes "double" quotes\'') + + def test_double_quoteattr(self): + self.assertEqual(quoteattr("Includes 'single' quotes"), + "\"Includes 'single' quotes\"") + + def test_single_double_quoteattr(self): + self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"), + "\"Includes 'single' and "double" quotes\"") + + # ===== make_parser + def test_make_parser(self): + # Creating a parser should succeed - it should fall back + # to the expatreader + p = make_parser(['xml.parsers.no_such_parser']) + + +class PrepareInputSourceTest(unittest.TestCase): + + def setUp(self): + self.file = os_helper.TESTFN + with open(self.file, "w") as tmp: + tmp.write("This was read from a file.") + + def tearDown(self): + os_helper.unlink(self.file) + + def make_byte_stream(self): + return BytesIO(b"This is a byte stream.") + + def make_character_stream(self): + return StringIO("This is a character stream.") + + def checkContent(self, stream, content): + self.assertIsNotNone(stream) + self.assertEqual(stream.read(), content) + stream.close() + + + def test_character_stream(self): + # If the source is an InputSource with a character stream, use it. + src = InputSource(self.file) + src.setCharacterStream(self.make_character_stream()) + prep = prepare_input_source(src) + self.assertIsNone(prep.getByteStream()) + self.checkContent(prep.getCharacterStream(), + "This is a character stream.") + + def test_byte_stream(self): + # If the source is an InputSource that does not have a character + # stream but does have a byte stream, use the byte stream. + src = InputSource(self.file) + src.setByteStream(self.make_byte_stream()) + prep = prepare_input_source(src) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This is a byte stream.") + + def test_system_id(self): + # If the source is an InputSource that has neither a character + # stream nor a byte stream, open the system ID. + src = InputSource(self.file) + prep = prepare_input_source(src) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This was read from a file.") + + def test_string(self): + # If the source is a string, use it as a system ID and open it. + prep = prepare_input_source(self.file) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This was read from a file.") + + def test_path_objects(self): + # If the source is a Path object, use it as a system ID and open it. + prep = prepare_input_source(FakePath(self.file)) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This was read from a file.") + + def test_binary_file(self): + # If the source is a binary file-like object, use it as a byte + # stream. + prep = prepare_input_source(self.make_byte_stream()) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This is a byte stream.") + + def test_text_file(self): + # If the source is a text file-like object, use it as a character + # stream. + prep = prepare_input_source(self.make_character_stream()) + self.assertIsNone(prep.getByteStream()) + self.checkContent(prep.getCharacterStream(), + "This is a character stream.") + + +# ===== XMLGenerator + +class XmlgenTest: + def test_xmlgen_basic(self): + result = self.ioclass() + gen = XMLGenerator(result) + gen.startDocument() + gen.startElement("doc", {}) + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml("")) + + def test_xmlgen_basic_empty(self): + result = self.ioclass() + gen = XMLGenerator(result, short_empty_elements=True) + gen.startDocument() + gen.startElement("doc", {}) + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml("")) + + def test_xmlgen_content(self): + result = self.ioclass() + gen = XMLGenerator(result) + + gen.startDocument() + gen.startElement("doc", {}) + gen.characters("huhei") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml("huhei")) + + def test_xmlgen_content_empty(self): + result = self.ioclass() + gen = XMLGenerator(result, short_empty_elements=True) + + gen.startDocument() + gen.startElement("doc", {}) + gen.characters("huhei") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml("huhei")) + + def test_xmlgen_pi(self): + result = self.ioclass() + gen = XMLGenerator(result) + + gen.startDocument() + gen.processingInstruction("test", "data") + gen.startElement("doc", {}) + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml("")) + + def test_xmlgen_content_escape(self): + result = self.ioclass() + gen = XMLGenerator(result) + + gen.startDocument() + gen.startElement("doc", {}) + gen.characters("<huhei&")) + + def test_xmlgen_attr_escape(self): + result = self.ioclass() + gen = XMLGenerator(result) + + gen.startDocument() + gen.startElement("doc", {"a": '"'}) + gen.startElement("e", {"a": "'"}) + gen.endElement("e") + gen.startElement("e", {"a": "'\""}) + gen.endElement("e") + gen.startElement("e", {"a": "\n\r\t"}) + gen.endElement("e") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml( + "" + "" + "")) + + def test_xmlgen_encoding(self): + encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig', + 'utf-16', 'utf-16be', 'utf-16le', + 'utf-32', 'utf-32be', 'utf-32le') + for encoding in encodings: + result = self.ioclass() + gen = XMLGenerator(result, encoding=encoding) + + gen.startDocument() + gen.startElement("doc", {"a": '\u20ac'}) + gen.characters("\u20ac") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml('\u20ac', encoding=encoding)) + + def test_xmlgen_unencodable(self): + result = self.ioclass() + gen = XMLGenerator(result, encoding='ascii') + + gen.startDocument() + gen.startElement("doc", {"a": '\u20ac'}) + gen.characters("\u20ac") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml('', encoding='ascii')) + + def test_xmlgen_ignorable(self): + result = self.ioclass() + gen = XMLGenerator(result) + + gen.startDocument() + gen.startElement("doc", {}) + gen.ignorableWhitespace(" ") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml(" ")) + + def test_xmlgen_ignorable_empty(self): + result = self.ioclass() + gen = XMLGenerator(result, short_empty_elements=True) + + gen.startDocument() + gen.startElement("doc", {}) + gen.ignorableWhitespace(" ") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml(" ")) + + def test_xmlgen_encoding_bytes(self): + encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig', + 'utf-16', 'utf-16be', 'utf-16le', + 'utf-32', 'utf-32be', 'utf-32le') + for encoding in encodings: + result = self.ioclass() + gen = XMLGenerator(result, encoding=encoding) + + gen.startDocument() + gen.startElement("doc", {"a": '\u20ac'}) + gen.characters("\u20ac".encode(encoding)) + gen.ignorableWhitespace(" ".encode(encoding)) + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml('\u20ac ', encoding=encoding)) + + def test_xmlgen_ns(self): + result = self.ioclass() + gen = XMLGenerator(result) + + gen.startDocument() + gen.startPrefixMapping("ns1", ns_uri) + gen.startElementNS((ns_uri, "doc"), "ns1:doc", {}) + # add an unqualified name + gen.startElementNS((None, "udoc"), None, {}) + gen.endElementNS((None, "udoc"), None) + gen.endElementNS((ns_uri, "doc"), "ns1:doc") + gen.endPrefixMapping("ns1") + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml( + '' % + ns_uri)) + + def test_xmlgen_ns_empty(self): + result = self.ioclass() + gen = XMLGenerator(result, short_empty_elements=True) + + gen.startDocument() + gen.startPrefixMapping("ns1", ns_uri) + gen.startElementNS((ns_uri, "doc"), "ns1:doc", {}) + # add an unqualified name + gen.startElementNS((None, "udoc"), None, {}) + gen.endElementNS((None, "udoc"), None) + gen.endElementNS((ns_uri, "doc"), "ns1:doc") + gen.endPrefixMapping("ns1") + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml( + '' % + ns_uri)) + + def test_1463026_1(self): + result = self.ioclass() + gen = XMLGenerator(result) + + gen.startDocument() + gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'}) + gen.endElementNS((None, 'a'), 'a') + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml('')) + + def test_1463026_1_empty(self): + result = self.ioclass() + gen = XMLGenerator(result, short_empty_elements=True) + + gen.startDocument() + gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'}) + gen.endElementNS((None, 'a'), 'a') + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml('')) + + def test_1463026_2(self): + result = self.ioclass() + gen = XMLGenerator(result) + + gen.startDocument() + gen.startPrefixMapping(None, 'qux') + gen.startElementNS(('qux', 'a'), 'a', {}) + gen.endElementNS(('qux', 'a'), 'a') + gen.endPrefixMapping(None) + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml('')) + + def test_1463026_2_empty(self): + result = self.ioclass() + gen = XMLGenerator(result, short_empty_elements=True) + + gen.startDocument() + gen.startPrefixMapping(None, 'qux') + gen.startElementNS(('qux', 'a'), 'a', {}) + gen.endElementNS(('qux', 'a'), 'a') + gen.endPrefixMapping(None) + gen.endDocument() + + self.assertEqual(result.getvalue(), self.xml('')) + + def test_1463026_3(self): + result = self.ioclass() + gen = XMLGenerator(result) + + gen.startDocument() + gen.startPrefixMapping('my', 'qux') + gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'}) + gen.endElementNS(('qux', 'a'), 'a') + gen.endPrefixMapping('my') + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml('')) + + def test_1463026_3_empty(self): + result = self.ioclass() + gen = XMLGenerator(result, short_empty_elements=True) + + gen.startDocument() + gen.startPrefixMapping('my', 'qux') + gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'}) + gen.endElementNS(('qux', 'a'), 'a') + gen.endPrefixMapping('my') + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml('')) + + def test_5027_1(self): + # The xml prefix (as in xml:lang below) is reserved and bound by + # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had + # a bug whereby a KeyError is raised because this namespace is missing + # from a dictionary. + # + # This test demonstrates the bug by parsing a document. + test_xml = StringIO( + '' + '' + 'Hello' + '') + + parser = make_parser() + parser.setFeature(feature_namespaces, True) + result = self.ioclass() + gen = XMLGenerator(result) + parser.setContentHandler(gen) + parser.parse(test_xml) + + self.assertEqual(result.getvalue(), + self.xml( + '' + 'Hello' + '')) + + def test_5027_2(self): + # The xml prefix (as in xml:lang below) is reserved and bound by + # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had + # a bug whereby a KeyError is raised because this namespace is missing + # from a dictionary. + # + # This test demonstrates the bug by direct manipulation of the + # XMLGenerator. + result = self.ioclass() + gen = XMLGenerator(result) + + gen.startDocument() + gen.startPrefixMapping('a', 'http://example.com/ns') + gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {}) + lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'} + gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr) + gen.characters('Hello') + gen.endElementNS(('http://example.com/ns', 'g2'), 'g2') + gen.endElementNS(('http://example.com/ns', 'g1'), 'g1') + gen.endPrefixMapping('a') + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml( + '' + 'Hello' + '')) + + def test_no_close_file(self): + result = self.ioclass() + def func(out): + gen = XMLGenerator(out) + gen.startDocument() + gen.startElement("doc", {}) + func(result) + self.assertFalse(result.closed) + + def test_xmlgen_fragment(self): + result = self.ioclass() + gen = XMLGenerator(result) + + # Don't call gen.startDocument() + gen.startElement("foo", {"a": "1.0"}) + gen.characters("Hello") + gen.endElement("foo") + gen.startElement("bar", {"b": "2.0"}) + gen.endElement("bar") + # Don't call gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml('Hello')[len(self.xml('')):]) + +class StringXmlgenTest(XmlgenTest, unittest.TestCase): + ioclass = StringIO + + def xml(self, doc, encoding='iso-8859-1'): + return '\n%s' % (encoding, doc) + + test_xmlgen_unencodable = None + +class BytesXmlgenTest(XmlgenTest, unittest.TestCase): + ioclass = BytesIO + + def xml(self, doc, encoding='iso-8859-1'): + return ('\n%s' % + (encoding, doc)).encode(encoding, 'xmlcharrefreplace') + +class WriterXmlgenTest(BytesXmlgenTest): + class ioclass(list): + write = list.append + closed = False + + def seekable(self): + return True + + def tell(self): + # return 0 at start and not 0 after start + return len(self) + + def getvalue(self): + return b''.join(self) + +class StreamWriterXmlgenTest(XmlgenTest, unittest.TestCase): + def ioclass(self): + raw = BytesIO() + writer = codecs.getwriter('ascii')(raw, 'xmlcharrefreplace') + writer.getvalue = raw.getvalue + return writer + + def xml(self, doc, encoding='iso-8859-1'): + return ('\n%s' % + (encoding, doc)).encode('ascii', 'xmlcharrefreplace') + +class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase): + fname = os_helper.TESTFN + '-codecs' + + def ioclass(self): + with self.assertWarns(DeprecationWarning): + writer = codecs.open(self.fname, 'w', encoding='ascii', + errors='xmlcharrefreplace', buffering=0) + def cleanup(): + writer.close() + os_helper.unlink(self.fname) + self.addCleanup(cleanup) + def getvalue(): + # Windows will not let use reopen without first closing + writer.close() + with open(writer.name, 'rb') as f: + return f.read() + writer.getvalue = getvalue + return writer + + def xml(self, doc, encoding='iso-8859-1'): + return ('\n%s' % + (encoding, doc)).encode('ascii', 'xmlcharrefreplace') + +start = b'\n' + + +class XMLFilterBaseTest(unittest.TestCase): + def test_filter_basic(self): + result = BytesIO() + gen = XMLGenerator(result) + filter = XMLFilterBase() + filter.setContentHandler(gen) + + filter.startDocument() + filter.startElement("doc", {}) + filter.characters("content") + filter.ignorableWhitespace(" ") + filter.endElement("doc") + filter.endDocument() + + self.assertEqual(result.getvalue(), start + b"content ") + +# =========================================================================== +# +# expatreader tests +# +# =========================================================================== + +with open(TEST_XMLFILE_OUT, 'rb') as f: + xml_test_out = f.read() + +class ExpatReaderTest(XmlTestBase): + + # ===== XMLReader support + + def test_expat_binary_file(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + with open(TEST_XMLFILE, 'rb') as f: + parser.parse(f) + + self.assertEqual(result.getvalue(), xml_test_out) + + def test_expat_text_file(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: + parser.parse(f) + + self.assertEqual(result.getvalue(), xml_test_out) + + @requires_nonascii_filenames + def test_expat_binary_file_nonascii(self): + fname = os_helper.TESTFN_UNICODE + shutil.copyfile(TEST_XMLFILE, fname) + self.addCleanup(os_helper.unlink, fname) + + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + parser.parse(open(fname, 'rb')) + + self.assertEqual(result.getvalue(), xml_test_out) + + def test_expat_binary_file_bytes_name(self): + fname = os.fsencode(TEST_XMLFILE) + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + with open(fname, 'rb') as f: + parser.parse(f) + + self.assertEqual(result.getvalue(), xml_test_out) + + def test_expat_binary_file_int_name(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + with open(TEST_XMLFILE, 'rb') as f: + with open(f.fileno(), 'rb', closefd=False) as f2: + parser.parse(f2) + + self.assertEqual(result.getvalue(), xml_test_out) + + # ===== DTDHandler support + + class TestDTDHandler: + + def __init__(self): + self._notations = [] + self._entities = [] + + def notationDecl(self, name, publicId, systemId): + self._notations.append((name, publicId, systemId)) + + def unparsedEntityDecl(self, name, publicId, systemId, ndata): + self._entities.append((name, publicId, systemId, ndata)) + + + class TestEntityRecorder: + def __init__(self): + self.entities = [] + + def resolveEntity(self, publicId, systemId): + self.entities.append((publicId, systemId)) + source = InputSource() + source.setPublicId(publicId) + source.setSystemId(systemId) + return source + + def test_expat_dtdhandler(self): + parser = create_parser() + handler = self.TestDTDHandler() + parser.setDTDHandler(handler) + + parser.feed('\n') + parser.feed(' \n') + parser.feed(']>\n') + parser.feed('') + parser.close() + + self.assertEqual(handler._notations, + [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)]) + self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")]) + + def test_expat_external_dtd_enabled(self): + # clear _opener global variable + self.addCleanup(urllib.request.urlcleanup) + + parser = create_parser() + parser.setFeature(feature_external_ges, True) + resolver = self.TestEntityRecorder() + parser.setEntityResolver(resolver) + + with self.assertRaises(URLError): + parser.feed( + '\n' + ) + self.assertEqual( + resolver.entities, [(None, 'unsupported://non-existing')] + ) + + def test_expat_external_dtd_default(self): + parser = create_parser() + resolver = self.TestEntityRecorder() + parser.setEntityResolver(resolver) + + parser.feed( + '\n' + ) + parser.feed('') + parser.close() + self.assertEqual(resolver.entities, []) + + # ===== EntityResolver support + + class TestEntityResolver: + + def resolveEntity(self, publicId, systemId): + inpsrc = InputSource() + inpsrc.setByteStream(BytesIO(b"")) + return inpsrc + + def test_expat_entityresolver_enabled(self): + parser = create_parser() + parser.setFeature(feature_external_ges, True) + parser.setEntityResolver(self.TestEntityResolver()) + result = BytesIO() + parser.setContentHandler(XMLGenerator(result)) + + parser.feed('\n') + parser.feed(']>\n') + parser.feed('&test;') + parser.close() + + self.assertEqual(result.getvalue(), start + + b"") + + def test_expat_entityresolver_default(self): + parser = create_parser() + self.assertEqual(parser.getFeature(feature_external_ges), False) + parser.setEntityResolver(self.TestEntityResolver()) + result = BytesIO() + parser.setContentHandler(XMLGenerator(result)) + + parser.feed('\n') + parser.feed(']>\n') + parser.feed('&test;') + parser.close() + + self.assertEqual(result.getvalue(), start + + b"") + + # ===== Attributes support + + class AttrGatherer(ContentHandler): + + def startElement(self, name, attrs): + self._attrs = attrs + + def startElementNS(self, name, qname, attrs): + self._attrs = attrs + + def test_expat_attrs_empty(self): + parser = create_parser() + gather = self.AttrGatherer() + parser.setContentHandler(gather) + + parser.feed("") + parser.close() + + self.verify_empty_attrs(gather._attrs) + + def test_expat_attrs_wattr(self): + parser = create_parser() + gather = self.AttrGatherer() + parser.setContentHandler(gather) + + parser.feed("") + parser.close() + + self.verify_attrs_wattr(gather._attrs) + + def test_expat_nsattrs_empty(self): + parser = create_parser(1) + gather = self.AttrGatherer() + parser.setContentHandler(gather) + + parser.feed("") + parser.close() + + self.verify_empty_nsattrs(gather._attrs) + + def test_expat_nsattrs_wattr(self): + parser = create_parser(1) + gather = self.AttrGatherer() + parser.setContentHandler(gather) + + parser.feed("" % ns_uri) + parser.close() + + attrs = gather._attrs + + self.assertEqual(attrs.getLength(), 1) + self.assertEqual(attrs.getNames(), [(ns_uri, "attr")]) + self.assertTrue((attrs.getQNames() == [] or + attrs.getQNames() == ["ns:attr"])) + self.assertEqual(len(attrs), 1) + self.assertIn((ns_uri, "attr"), attrs) + self.assertEqual(attrs.get((ns_uri, "attr")), "val") + self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val") + self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")]) + self.assertEqual(list(attrs.values()), ["val"]) + self.assertEqual(attrs.getValue((ns_uri, "attr")), "val") + self.assertEqual(attrs[(ns_uri, "attr")], "val") + + # ===== InputSource support + + def test_expat_inpsource_filename(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + parser.parse(TEST_XMLFILE) + + self.assertEqual(result.getvalue(), xml_test_out) + + def test_expat_inpsource_sysid(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + parser.parse(InputSource(TEST_XMLFILE)) + + self.assertEqual(result.getvalue(), xml_test_out) + + @requires_nonascii_filenames + def test_expat_inpsource_sysid_nonascii(self): + fname = os_helper.TESTFN_UNICODE + shutil.copyfile(TEST_XMLFILE, fname) + self.addCleanup(os_helper.unlink, fname) + + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + parser.parse(InputSource(fname)) + + self.assertEqual(result.getvalue(), xml_test_out) + + def test_expat_inpsource_byte_stream(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + inpsrc = InputSource() + with open(TEST_XMLFILE, 'rb') as f: + inpsrc.setByteStream(f) + parser.parse(inpsrc) + + self.assertEqual(result.getvalue(), xml_test_out) + + def test_expat_inpsource_character_stream(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + inpsrc = InputSource() + with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: + inpsrc.setCharacterStream(f) + parser.parse(inpsrc) + + self.assertEqual(result.getvalue(), xml_test_out) + + # ===== IncrementalParser support + + def test_expat_incremental(self): + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + parser.feed("") + parser.feed("") + parser.close() + + self.assertEqual(result.getvalue(), start + b"") + + def test_expat_incremental_reset(self): + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + parser.feed("") + parser.feed("text") + + result = BytesIO() + xmlgen = XMLGenerator(result) + parser.setContentHandler(xmlgen) + parser.reset() + + parser.feed("") + parser.feed("text") + parser.feed("") + parser.close() + + self.assertEqual(result.getvalue(), start + b"text") + + @unittest.skipIf(pyexpat.version_info < (2, 6, 0), + f'Expat {pyexpat.version_info} does not ' + 'support reparse deferral') + def test_flush_reparse_deferral_enabled(self): + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + for chunk in (""): + parser.feed(chunk) + + self.assertEqual(result.getvalue(), start) # i.e. no elements started + self.assertTrue(parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assertTrue(parser._parser.GetReparseDeferralEnabled()) + self.assertEqual(result.getvalue(), start + b"") + + parser.feed("") + parser.close() + + self.assertEqual(result.getvalue(), start + b"") + + def test_flush_reparse_deferral_disabled(self): + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + for chunk in (""): + parser.feed(chunk) + + if pyexpat.version_info >= (2, 6, 0): + parser._parser.SetReparseDeferralEnabled(False) + self.assertEqual(result.getvalue(), start) # i.e. no elements started + + self.assertFalse(parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assertFalse(parser._parser.GetReparseDeferralEnabled()) + self.assertEqual(result.getvalue(), start + b"") + + parser.feed("") + parser.close() + + self.assertEqual(result.getvalue(), start + b"") + + # ===== Locator support + + def test_expat_locator_noinfo(self): + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + parser.feed("") + parser.feed("") + parser.close() + + self.assertEqual(parser.getSystemId(), None) + self.assertEqual(parser.getPublicId(), None) + self.assertEqual(parser.getLineNumber(), 1) + + def test_expat_locator_withinfo(self): + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + parser.parse(TEST_XMLFILE) + + self.assertEqual(parser.getSystemId(), TEST_XMLFILE) + self.assertEqual(parser.getPublicId(), None) + + @requires_nonascii_filenames + def test_expat_locator_withinfo_nonascii(self): + fname = os_helper.TESTFN_UNICODE + shutil.copyfile(TEST_XMLFILE, fname) + self.addCleanup(os_helper.unlink, fname) + + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + parser.parse(fname) + + self.assertEqual(parser.getSystemId(), fname) + self.assertEqual(parser.getPublicId(), None) + + +# =========================================================================== +# +# error reporting +# +# =========================================================================== + +class ErrorReportingTest(unittest.TestCase): + def test_expat_inpsource_location(self): + parser = create_parser() + parser.setContentHandler(ContentHandler()) # do nothing + source = InputSource() + source.setByteStream(BytesIO(b"")) #ill-formed + name = "a file name" + source.setSystemId(name) + try: + parser.parse(source) + self.fail() + except SAXException as e: + self.assertEqual(e.getSystemId(), name) + + def test_expat_incomplete(self): + parser = create_parser() + parser.setContentHandler(ContentHandler()) # do nothing + self.assertRaises(SAXParseException, parser.parse, StringIO("")) + self.assertEqual(parser.getColumnNumber(), 5) + self.assertEqual(parser.getLineNumber(), 1) + + def test_sax_parse_exception_str(self): + # pass various values from a locator to the SAXParseException to + # make sure that the __str__() doesn't fall apart when None is + # passed instead of an integer line and column number + # + # use "normal" values for the locator: + str(SAXParseException("message", None, + self.DummyLocator(1, 1))) + # use None for the line number: + str(SAXParseException("message", None, + self.DummyLocator(None, 1))) + # use None for the column number: + str(SAXParseException("message", None, + self.DummyLocator(1, None))) + # use None for both: + str(SAXParseException("message", None, + self.DummyLocator(None, None))) + + class DummyLocator: + def __init__(self, lineno, colno): + self._lineno = lineno + self._colno = colno + + def getPublicId(self): + return "pubid" + + def getSystemId(self): + return "sysid" + + def getLineNumber(self): + return self._lineno + + def getColumnNumber(self): + return self._colno + +# =========================================================================== +# +# xmlreader tests +# +# =========================================================================== + +class XmlReaderTest(XmlTestBase): + + # ===== AttributesImpl + def test_attrs_empty(self): + self.verify_empty_attrs(AttributesImpl({})) + + def test_attrs_wattr(self): + self.verify_attrs_wattr(AttributesImpl({"attr" : "val"})) + + def test_nsattrs_empty(self): + self.verify_empty_nsattrs(AttributesNSImpl({}, {})) + + def test_nsattrs_wattr(self): + attrs = AttributesNSImpl({(ns_uri, "attr") : "val"}, + {(ns_uri, "attr") : "ns:attr"}) + + self.assertEqual(attrs.getLength(), 1) + self.assertEqual(attrs.getNames(), [(ns_uri, "attr")]) + self.assertEqual(attrs.getQNames(), ["ns:attr"]) + self.assertEqual(len(attrs), 1) + self.assertIn((ns_uri, "attr"), attrs) + self.assertEqual(list(attrs.keys()), [(ns_uri, "attr")]) + self.assertEqual(attrs.get((ns_uri, "attr")), "val") + self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val") + self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")]) + self.assertEqual(list(attrs.values()), ["val"]) + self.assertEqual(attrs.getValue((ns_uri, "attr")), "val") + self.assertEqual(attrs.getValueByQName("ns:attr"), "val") + self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr")) + self.assertEqual(attrs[(ns_uri, "attr")], "val") + self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr") + + +class LexicalHandlerTest(unittest.TestCase): + def setUp(self): + self.parser = None + + self.specified_version = '1.0' + self.specified_encoding = 'UTF-8' + self.specified_doctype = 'wish' + self.specified_entity_names = ('nbsp', 'source', 'target') + self.specified_comment = ('Comment in a DTD', + 'Really! You think so?') + self.test_data = StringIO() + self.test_data.write('\n'. + format(self.specified_version, + self.specified_encoding)) + self.test_data.write('\n'. + format(self.specified_comment[0])) + self.test_data.write('\n'. + format(self.specified_doctype)) + self.test_data.write('\n') + self.test_data.write('\n') + self.test_data.write('\n') + self.test_data.write('\n') + self.test_data.write('\n') + self.test_data.write('\n'. + format(self.specified_entity_names[0])) + self.test_data.write('\n'. + format(self.specified_entity_names[1])) + self.test_data.write('\n'. + format(self.specified_entity_names[2])) + self.test_data.write(']>\n') + self.test_data.write('<{}>'.format(self.specified_doctype)) + self.test_data.write('Aristotle\n') + self.test_data.write('Alexander\n') + self.test_data.write('Supplication\n') + self.test_data.write('Teach me patience!\n') + self.test_data.write('
&{};&{};&{};
\n'. + format(self.specified_entity_names[1], + self.specified_entity_names[0], + self.specified_entity_names[2])) + self.test_data.write('\n'.format(self.specified_comment[1])) + self.test_data.write('\n'.format(self.specified_doctype)) + self.test_data.seek(0) + + # Data received from handlers - to be validated + self.version = None + self.encoding = None + self.standalone = None + self.doctype = None + self.publicID = None + self.systemID = None + self.end_of_dtd = False + self.comments = [] + + def test_handlers(self): + class TestLexicalHandler(LexicalHandler): + def __init__(self, test_harness, *args, **kwargs): + super().__init__(*args, **kwargs) + self.test_harness = test_harness + + def startDTD(self, doctype, publicID, systemID): + self.test_harness.doctype = doctype + self.test_harness.publicID = publicID + self.test_harness.systemID = systemID + + def endDTD(self): + self.test_harness.end_of_dtd = True + + def comment(self, text): + self.test_harness.comments.append(text) + + self.parser = create_parser() + self.parser.setContentHandler(ContentHandler()) + self.parser.setProperty( + 'http://xml.org/sax/properties/lexical-handler', + TestLexicalHandler(self)) + source = InputSource() + source.setCharacterStream(self.test_data) + self.parser.parse(source) + self.assertEqual(self.doctype, self.specified_doctype) + self.assertIsNone(self.publicID) + self.assertIsNone(self.systemID) + self.assertTrue(self.end_of_dtd) + self.assertEqual(len(self.comments), + len(self.specified_comment)) + self.assertEqual(f' {self.specified_comment[0]} ', self.comments[0]) + + +class CDATAHandlerTest(unittest.TestCase): + def setUp(self): + self.parser = None + self.specified_chars = [] + self.specified_chars.append(('Parseable character data', False)) + self.specified_chars.append(('<> &% - assorted other XML junk.', True)) + self.char_index = 0 # Used to index specified results within handlers + self.test_data = StringIO() + self.test_data.write('\n') + self.test_data.write('\n') + self.test_data.write(f'{self.specified_chars[0][0]}\n') + self.test_data.write('\n') + self.test_data.write('\n') + self.test_data.write(f'\n') + self.test_data.write('\n') + self.test_data.write('\n') + self.test_data.seek(0) + + # Data received from handlers - to be validated + self.chardata = [] + self.in_cdata = False + + def test_handlers(self): + class TestLexicalHandler(LexicalHandler): + def __init__(self, test_harness, *args, **kwargs): + super().__init__(*args, **kwargs) + self.test_harness = test_harness + + def startCDATA(self): + self.test_harness.in_cdata = True + + def endCDATA(self): + self.test_harness.in_cdata = False + + class TestCharHandler(ContentHandler): + def __init__(self, test_harness, *args, **kwargs): + super().__init__(*args, **kwargs) + self.test_harness = test_harness + + def characters(self, content): + if content != '\n': + h = self.test_harness + t = h.specified_chars[h.char_index] + h.assertEqual(t[0], content) + h.assertEqual(t[1], h.in_cdata) + h.char_index += 1 + + self.parser = create_parser() + self.parser.setContentHandler(TestCharHandler(self)) + self.parser.setProperty( + 'http://xml.org/sax/properties/lexical-handler', + TestLexicalHandler(self)) + source = InputSource() + source.setCharacterStream(self.test_data) + self.parser.parse(source) + + self.assertFalse(self.in_cdata) + self.assertEqual(self.char_index, 2) + + +class TestModuleAll(unittest.TestCase): + def test_all(self): + extra = ( + 'ContentHandler', + 'ErrorHandler', + 'InputSource', + 'SAXException', + 'SAXNotRecognizedException', + 'SAXNotSupportedException', + 'SAXParseException', + 'SAXReaderNotAvailable', + ) + check__all__(self, sax, extra=extra) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 9d6d39307..543583fbd 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -155,9 +155,9 @@ class ModuleTest(unittest.TestCase): def test_sanity(self): # Import sanity. - from xml.etree import ElementTree - from xml.etree import ElementInclude - from xml.etree import ElementPath + from xml.etree import ElementTree # noqa: F401 + from xml.etree import ElementInclude # noqa: F401 + from xml.etree import ElementPath # noqa: F401 def test_all(self): names = ("xml.etree.ElementTree", "_elementtree") @@ -252,8 +252,7 @@ class ElementTreeTest(unittest.TestCase): self.assertTrue(ET.iselement(element), msg="not an element") direlem = dir(element) for attr in 'tag', 'attrib', 'text', 'tail': - self.assertTrue(hasattr(element, attr), - msg='no %s member' % attr) + self.assertHasAttr(element, attr) self.assertIn(attr, direlem, msg='no %s visible by dir' % attr) @@ -278,7 +277,7 @@ class ElementTreeTest(unittest.TestCase): # Make sure all standard element methods exist. def check_method(method): - self.assertTrue(hasattr(method, '__call__'), + self.assertHasAttr(method, '__call__', msg="%s not callable" % method) check_method(element.append) @@ -339,7 +338,7 @@ class ElementTreeTest(unittest.TestCase): element.attrib = {'A': 'B', 'C': 'D'} self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_simpleops(self): # Basic method sanity checks. @@ -372,9 +371,9 @@ class ElementTreeTest(unittest.TestCase): self.serialize_check(element, '') # 4 element.remove(subelement) self.serialize_check(element, '') # 5 - with self.assertRaises(ValueError) as cm: + with self.assertRaisesRegex(ValueError, + r'Element\.remove\(.+\): element not found'): element.remove(subelement) - self.assertEqual(str(cm.exception), 'list.remove(x): x not in list') self.serialize_check(element, '') # 6 element[0:0] = [subelement, subelement, subelement] self.serialize_check(element[1], '') @@ -394,7 +393,7 @@ class ElementTreeTest(unittest.TestCase): self.serialize_check(ET.XML(""), 'hello') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_file_init(self): stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8")) tree = ET.ElementTree(file=stringfile) @@ -510,7 +509,7 @@ class ElementTreeTest(unittest.TestCase): elem[:] = tuple([subelem]) self.serialize_check(elem, '') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_parsefile(self): # Test parsing from file. @@ -556,7 +555,7 @@ class ElementTreeTest(unittest.TestCase): ' \n' '
') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_parseliteral(self): element = ET.XML("text") self.assertEqual(ET.tostring(element, encoding='unicode'), @@ -579,210 +578,6 @@ class ElementTreeTest(unittest.TestCase): self.assertEqual(len(ids), 1) self.assertEqual(ids["body"].tag, 'body') - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_iterparse(self): - # Test iterparse interface. - - iterparse = ET.iterparse - - context = iterparse(SIMPLE_XMLFILE) - self.assertIsNone(context.root) - action, elem = next(context) - self.assertIsNone(context.root) - self.assertEqual((action, elem.tag), ('end', 'element')) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('end', 'element'), - ('end', 'empty-element'), - ('end', 'root'), - ]) - self.assertEqual(context.root.tag, 'root') - - context = iterparse(SIMPLE_NS_XMLFILE) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('end', '{namespace}element'), - ('end', '{namespace}element'), - ('end', '{namespace}empty-element'), - ('end', '{namespace}root'), - ]) - - with open(SIMPLE_XMLFILE, 'rb') as source: - context = iterparse(source) - action, elem = next(context) - self.assertEqual((action, elem.tag), ('end', 'element')) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('end', 'element'), - ('end', 'empty-element'), - ('end', 'root'), - ]) - self.assertEqual(context.root.tag, 'root') - - events = () - context = iterparse(SIMPLE_XMLFILE, events) - self.assertEqual([(action, elem.tag) for action, elem in context], []) - - events = () - context = iterparse(SIMPLE_XMLFILE, events=events) - self.assertEqual([(action, elem.tag) for action, elem in context], []) - - events = ("start", "end") - context = iterparse(SIMPLE_XMLFILE, events) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('start', 'root'), - ('start', 'element'), - ('end', 'element'), - ('start', 'element'), - ('end', 'element'), - ('start', 'empty-element'), - ('end', 'empty-element'), - ('end', 'root'), - ]) - - events = ("start", "end", "start-ns", "end-ns") - context = iterparse(SIMPLE_NS_XMLFILE, events) - self.assertEqual([(action, elem.tag) if action in ("start", "end") - else (action, elem) - for action, elem in context], [ - ('start-ns', ('', 'namespace')), - ('start', '{namespace}root'), - ('start', '{namespace}element'), - ('end', '{namespace}element'), - ('start', '{namespace}element'), - ('end', '{namespace}element'), - ('start', '{namespace}empty-element'), - ('end', '{namespace}empty-element'), - ('end', '{namespace}root'), - ('end-ns', None), - ]) - - events = ('start-ns', 'end-ns') - context = iterparse(io.StringIO(r""), events) - res = [action for action, elem in context] - self.assertEqual(res, ['start-ns', 'end-ns']) - - events = ("start", "end", "bogus") - with open(SIMPLE_XMLFILE, "rb") as f: - with self.assertRaises(ValueError) as cm: - iterparse(f, events) - self.assertFalse(f.closed) - self.assertEqual(str(cm.exception), "unknown event 'bogus'") - - with warnings_helper.check_no_resource_warning(self): - with self.assertRaises(ValueError) as cm: - iterparse(SIMPLE_XMLFILE, events) - self.assertEqual(str(cm.exception), "unknown event 'bogus'") - del cm - - source = io.BytesIO( - b"\n" - b"text\n") - events = ("start-ns",) - context = iterparse(source, events) - self.assertEqual([(action, elem) for action, elem in context], [ - ('start-ns', ('', 'http://\xe9ffbot.org/ns')), - ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), - ]) - - source = io.StringIO("junk") - it = iterparse(source) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'document')) - with self.assertRaises(ET.ParseError) as cm: - next(it) - self.assertEqual(str(cm.exception), - 'junk after document element: line 1, column 12') - - self.addCleanup(os_helper.unlink, TESTFN) - with open(TESTFN, "wb") as f: - f.write(b"junk") - it = iterparse(TESTFN) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'document')) - with warnings_helper.check_no_resource_warning(self): - with self.assertRaises(ET.ParseError) as cm: - next(it) - self.assertEqual(str(cm.exception), - 'junk after document element: line 1, column 12') - del cm, it - - # Not exhausting the iterator still closes the resource (bpo-43292) - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - del it - - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - it.close() - del it - - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'element')) - del it, elem - - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - action, elem = next(it) - it.close() - self.assertEqual((action, elem.tag), ('end', 'element')) - del it, elem - - with self.assertRaises(FileNotFoundError): - iterparse("nonexistent") - - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_iterparse_close(self): - iterparse = ET.iterparse - - it = iterparse(SIMPLE_XMLFILE) - it.close() - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - with open(SIMPLE_XMLFILE, 'rb') as source: - it = iterparse(source) - it.close() - self.assertFalse(source.closed) - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - it = iterparse(SIMPLE_XMLFILE) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'element')) - it.close() - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - with open(SIMPLE_XMLFILE, 'rb') as source: - it = iterparse(source) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'element')) - it.close() - self.assertFalse(source.closed) - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - it = iterparse(SIMPLE_XMLFILE) - list(it) - it.close() - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - with open(SIMPLE_XMLFILE, 'rb') as source: - it = iterparse(source) - list(it) - it.close() - self.assertFalse(source.closed) - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - def test_writefile(self): elem = ET.Element("tag") elem.text = "text" @@ -800,7 +595,7 @@ class ElementTreeTest(unittest.TestCase): elem[0] = ET.PI("key", "value") self.serialize_check(elem, 'textsubtext') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_custom_builder(self): # Test parser w. custom builder. @@ -862,7 +657,7 @@ class ElementTreeTest(unittest.TestCase): ('end-ns', ''), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_custom_builder_only_end_ns(self): class Builder(list): def end_ns(self, prefix): @@ -894,7 +689,7 @@ class ElementTreeTest(unittest.TestCase): parser2 = ET.XMLParser() self.assertIsInstance(parser2.target, ET.TreeBuilder) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_children(self): # Test Element children iteration @@ -1174,7 +969,7 @@ class ElementTreeTest(unittest.TestCase): self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>") self.assertEqual(['', '', ''], stringlist[1:]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_encoding(self): def check(encoding, body=''): xml = ("%s" % @@ -1256,7 +1051,7 @@ class ElementTreeTest(unittest.TestCase): self.assertEqual(serialize(e, method="html"), 'text') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_entity(self): # Test entity handling. @@ -1294,7 +1089,7 @@ class ElementTreeTest(unittest.TestCase): self.assertEqual(str(cm.exception), 'undefined entity &entity;: line 4, column 10') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_namespace(self): # Test namespace issues. @@ -1505,13 +1300,249 @@ class ElementTreeTest(unittest.TestCase): self.assertEqual(serialize(root, method='html'), '') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_attlist_default(self): # Test default attribute values; See BPO 42151. root = ET.fromstring(ATTLIST_XML) self.assertEqual(root[0].attrib, {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'}) + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_iterparse(self): + return super().test_iterparse() + + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_iterparse_close(self): + return super().test_iterparse_close() + + +class IterparseTest(unittest.TestCase): + # Test iterparse interface. + + def test_basic(self): + iterparse = ET.iterparse + + it = iterparse(SIMPLE_XMLFILE) + self.assertIsNone(it.root) + action, elem = next(it) + self.assertIsNone(it.root) + self.assertEqual((action, elem.tag), ('end', 'element')) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('end', 'element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + self.assertEqual(it.root.tag, 'root') + it.close() + + it = iterparse(SIMPLE_NS_XMLFILE) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('end', '{namespace}element'), + ('end', '{namespace}element'), + ('end', '{namespace}empty-element'), + ('end', '{namespace}root'), + ]) + it.close() + + def test_external_file(self): + with open(SIMPLE_XMLFILE, 'rb') as source: + it = ET.iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('end', 'element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + self.assertEqual(it.root.tag, 'root') + + def test_events(self): + iterparse = ET.iterparse + + events = () + it = iterparse(SIMPLE_XMLFILE, events) + self.assertEqual([(action, elem.tag) for action, elem in it], []) + it.close() + + events = () + it = iterparse(SIMPLE_XMLFILE, events=events) + self.assertEqual([(action, elem.tag) for action, elem in it], []) + it.close() + + events = ("start", "end") + it = iterparse(SIMPLE_XMLFILE, events) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('start', 'root'), + ('start', 'element'), + ('end', 'element'), + ('start', 'element'), + ('end', 'element'), + ('start', 'empty-element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + it.close() + + def test_namespace_events(self): + iterparse = ET.iterparse + + events = ("start", "end", "start-ns", "end-ns") + it = iterparse(SIMPLE_NS_XMLFILE, events) + self.assertEqual([(action, elem.tag) if action in ("start", "end") + else (action, elem) + for action, elem in it], [ + ('start-ns', ('', 'namespace')), + ('start', '{namespace}root'), + ('start', '{namespace}element'), + ('end', '{namespace}element'), + ('start', '{namespace}element'), + ('end', '{namespace}element'), + ('start', '{namespace}empty-element'), + ('end', '{namespace}empty-element'), + ('end', '{namespace}root'), + ('end-ns', None), + ]) + it.close() + + events = ('start-ns', 'end-ns') + it = iterparse(io.BytesIO(br""), events) + res = [action for action, elem in it] + self.assertEqual(res, ['start-ns', 'end-ns']) + it.close() + + def test_unknown_events(self): + iterparse = ET.iterparse + + events = ("start", "end", "bogus") + with open(SIMPLE_XMLFILE, "rb") as f: + with self.assertRaises(ValueError) as cm: + iterparse(f, events) + self.assertFalse(f.closed) + self.assertEqual(str(cm.exception), "unknown event 'bogus'") + + with warnings_helper.check_no_resource_warning(self): + with self.assertRaises(ValueError) as cm: + iterparse(SIMPLE_XMLFILE, events) + self.assertEqual(str(cm.exception), "unknown event 'bogus'") + del cm + gc_collect() + + def test_non_utf8(self): + source = io.BytesIO( + b"\n" + b"text\n") + events = ("start-ns",) + it = ET.iterparse(source, events) + self.assertEqual([(action, elem) for action, elem in it], [ + ('start-ns', ('', 'http://\xe9ffbot.org/ns')), + ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), + ]) + + def test_parsing_error(self): + source = io.BytesIO(b"junk") + it = ET.iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'document')) + with self.assertRaises(ET.ParseError) as cm: + next(it) + self.assertEqual(str(cm.exception), + 'junk after document element: line 1, column 12') + + def test_nonexistent_file(self): + with self.assertRaises(FileNotFoundError): + ET.iterparse("nonexistent") + + def test_resource_warnings_not_exhausted(self): + # Not exhausting the iterator still closes the underlying file (bpo-43292) + it = ET.iterparse(SIMPLE_XMLFILE) + with warnings_helper.check_no_resource_warning(self): + del it + gc_collect() + + it = ET.iterparse(SIMPLE_XMLFILE) + with warnings_helper.check_no_resource_warning(self): + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + del it, elem + gc_collect() + + def test_resource_warnings_failed_iteration(self): + self.addCleanup(os_helper.unlink, TESTFN) + with open(TESTFN, "wb") as f: + f.write(b"junk") + + it = ET.iterparse(TESTFN) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'document')) + with warnings_helper.check_no_resource_warning(self): + with self.assertRaises(ET.ParseError) as cm: + next(it) + self.assertEqual(str(cm.exception), + 'junk after document element: line 1, column 12') + del cm, it + gc_collect() + + def test_resource_warnings_exhausted(self): + it = ET.iterparse(SIMPLE_XMLFILE) + with warnings_helper.check_no_resource_warning(self): + list(it) + del it + gc_collect() + + def test_close_not_exhausted(self): + iterparse = ET.iterparse + + it = iterparse(SIMPLE_XMLFILE) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + it = iterparse(SIMPLE_XMLFILE) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + def test_close_exhausted(self): + iterparse = ET.iterparse + it = iterparse(SIMPLE_XMLFILE) + list(it) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + list(it) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + class XMLPullParserTest(unittest.TestCase): @@ -1540,7 +1571,7 @@ class XMLPullParserTest(unittest.TestCase): self.assertEqual([(action, elem.tag) for action, elem in events], expected) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_simple_xml(self, chunk_size=None, flush=False): parser = ET.XMLPullParser() self.assert_event_tags(parser, []) @@ -1562,19 +1593,19 @@ class XMLPullParserTest(unittest.TestCase): self.assert_event_tags(parser, [('end', 'root')]) self.assertIsNone(parser.close()) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_simple_xml_chunk_1(self): self.test_simple_xml(chunk_size=1, flush=True) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_simple_xml_chunk_5(self): self.test_simple_xml(chunk_size=5, flush=True) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_simple_xml_chunk_22(self): self.test_simple_xml(chunk_size=22) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_feed_while_iterating(self): parser = ET.XMLPullParser() it = parser.read_events() @@ -1587,7 +1618,7 @@ class XMLPullParserTest(unittest.TestCase): with self.assertRaises(StopIteration): next(it) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_simple_xml_with_ns(self): parser = ET.XMLPullParser() self.assert_event_tags(parser, []) @@ -1609,7 +1640,7 @@ class XMLPullParserTest(unittest.TestCase): self.assert_event_tags(parser, [('end', '{namespace}root')]) self.assertIsNone(parser.close()) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_ns_events(self): parser = ET.XMLPullParser(events=('start-ns', 'end-ns')) self._feed(parser, "\n") @@ -1625,7 +1656,7 @@ class XMLPullParserTest(unittest.TestCase): self.assertEqual(list(parser.read_events()), [('end-ns', None)]) self.assertIsNone(parser.close()) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_ns_events_start(self): parser = ET.XMLPullParser(events=('start-ns', 'start', 'end')) self._feed(parser, "\n") @@ -1649,7 +1680,7 @@ class XMLPullParserTest(unittest.TestCase): ('end', '{abc}tag'), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_ns_events_start_end(self): parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns')) self._feed(parser, "\n") @@ -1677,7 +1708,7 @@ class XMLPullParserTest(unittest.TestCase): ('end-ns', None), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_events(self): parser = ET.XMLPullParser(events=()) self._feed(parser, "\n") @@ -1724,7 +1755,7 @@ class XMLPullParserTest(unittest.TestCase): self._feed(parser, "") self.assertIsNone(parser.close()) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_events_comment(self): parser = ET.XMLPullParser(events=('start', 'comment', 'end')) self._feed(parser, "\n") @@ -1744,7 +1775,7 @@ class XMLPullParserTest(unittest.TestCase): self._feed(parser, "\n") self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_events_pi(self): parser = ET.XMLPullParser(events=('start', 'pi', 'end')) self._feed(parser, "\n") @@ -1775,8 +1806,10 @@ class XMLPullParserTest(unittest.TestCase): def test_unknown_event(self): with self.assertRaises(ValueError): ET.XMLPullParser(events=('start', 'end', 'bogus')) + with self.assertRaisesRegex(ValueError, "unknown event 'bogus'"): + ET.XMLPullParser(events=(x.decode() for x in (b'start', b'end', b'bogus'))) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON @unittest.skipIf(pyexpat.version_info < (2, 6, 0), f'Expat {pyexpat.version_info} does not ' 'support reparse deferral') @@ -1801,7 +1834,7 @@ class XMLPullParserTest(unittest.TestCase): self.assert_event_tags(parser, [('end', 'doc')]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_flush_reparse_deferral_disabled(self): parser = ET.XMLPullParser(events=('start', 'end')) @@ -1984,7 +2017,7 @@ class XIncludeTest(unittest.TestCase): else: return None - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_xinclude_default(self): from xml.etree import ElementInclude doc = self.xinclude_loader('default.xml') @@ -1999,7 +2032,7 @@ class XIncludeTest(unittest.TestCase): '\n' '') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_xinclude(self): from xml.etree import ElementInclude @@ -2064,7 +2097,7 @@ class XIncludeTest(unittest.TestCase): ' \n' '') # C5 - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_xinclude_repeated(self): from xml.etree import ElementInclude @@ -2072,7 +2105,7 @@ class XIncludeTest(unittest.TestCase): ElementInclude.include(document, self.xinclude_loader) self.assertEqual(1+4*2, len(document.findall(".//p"))) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_xinclude_failures(self): from xml.etree import ElementInclude @@ -2177,7 +2210,7 @@ class BugsTest(unittest.TestCase): elem.set("123", 123) check(elem) # attribute value - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_bug_xmltoolkit25(self): # typo in ElementTree.findtext @@ -2201,7 +2234,7 @@ class BugsTest(unittest.TestCase): ET.dump(tree) self.assertEqual(stdout.getvalue(), '
\n') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_bug_xmltoolkit39(self): # non-ascii element and attribute names doesn't work @@ -2236,7 +2269,7 @@ class BugsTest(unittest.TestCase): b'') self.assertEqual(serialize(e), '\u8230') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_bug_xmltoolkit55(self): # make sure we're reporting the first error, not the last @@ -2255,7 +2288,7 @@ class BugsTest(unittest.TestCase): self.assertRaises(OSError, ET.parse, ExceptionFile()) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_bug_xmltoolkit62(self): # Don't crash when using custom entities. @@ -2288,7 +2321,7 @@ class BugsTest(unittest.TestCase): xmltoolkit63() self.assertEqual(sys.getrefcount(None), count) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_bug_200708_newline(self): # Preserve newlines in attributes. @@ -2404,7 +2437,7 @@ class BugsTest(unittest.TestCase): b"\n" b'tãg') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_issue6565(self): elem = ET.XML("") self.assertEqual(summarize_list(elem), ['tag']) @@ -2450,7 +2483,7 @@ class BugsTest(unittest.TestCase): self.assertIsInstance(e[0].tail, str) self.assertEqual(e[0].tail, 'changed') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_lost_elem(self): # Issue #25902: Borrowed element can disappear class Tag: @@ -2476,7 +2509,7 @@ class BugsTest(unittest.TestCase): root = ET.XML(xml) self.assertEqual(root.get('b'), text.decode('utf-8')) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_expat224_utf8_bug(self): # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. # Check that Expat 2.2.4 fixed the bug. @@ -2489,7 +2522,7 @@ class BugsTest(unittest.TestCase): text = b'x' + b'\xc3\xa0' * 1024 self.check_expat224_utf8_bug(text) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_expat224_utf8_bug_file(self): with open(UTF8_BUG_XMLFILE, 'rb') as fp: raw = fp.read() @@ -2639,7 +2672,7 @@ class BasicElementTest(ElementTestCase, unittest.TestCase): e[:] = [E('bar')] self.assertRaises(TypeError, copy.deepcopy, e) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_cyclic_gc(self): class Dummy: pass @@ -2977,32 +3010,72 @@ class BadElementTest(ElementTestCase, unittest.TestCase): elem = b.close() self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL') - def test_subscr(self): - # Issue #27863 + def test_subscr_with_clear(self): + # See https://github.com/python/cpython/issues/143200. + self.do_test_subscr_with_mutating_slice(use_clear_method=True) + + def test_subscr_with_delete(self): + # See https://github.com/python/cpython/issues/72050. + self.do_test_subscr_with_mutating_slice(use_clear_method=False) + + def do_test_subscr_with_mutating_slice(self, *, use_clear_method): class X: + def __init__(self, i=0): + self.i = i def __index__(self): - del e[:] - return 1 + if use_clear_method: + e.clear() + else: + del e[:] + return self.i - e = ET.Element('elem') - e.append(ET.Element('child')) - e[:X()] # shouldn't crash + for s in self.get_mutating_slices(X, 10): + with self.subTest(s): + e = ET.Element('elem') + e.extend([ET.Element(f'c{i}') for i in range(10)]) + e[s] # shouldn't crash - e.append(ET.Element('child')) - e[0:10:X()] # shouldn't crash + def test_ass_subscr_with_mutating_slice(self): + # See https://github.com/python/cpython/issues/72050 + # and https://github.com/python/cpython/issues/143200. - def test_ass_subscr(self): - # Issue #27863 class X: + def __init__(self, i=0): + self.i = i def __index__(self): e[:] = [] - return 1 + return self.i + + for s in self.get_mutating_slices(X, 10): + with self.subTest(s): + e = ET.Element('elem') + e.extend([ET.Element(f'c{i}') for i in range(10)]) + e[s] = [] # shouldn't crash + + def get_mutating_slices(self, index_class, n_children): + self.assertGreaterEqual(n_children, 10) + return [ + slice(index_class(), None, None), + slice(index_class(2), None, None), + slice(None, index_class(), None), + slice(None, index_class(2), None), + slice(0, 2, index_class(1)), + slice(0, 2, index_class(2)), + slice(0, n_children, index_class(1)), + slice(0, n_children, index_class(2)), + slice(0, 2 * n_children, index_class(1)), + slice(0, 2 * n_children, index_class(2)), + ] + + def test_ass_subscr_with_mutating_iterable_value(self): + class V: + def __iter__(self): + e.clear() + return iter([ET.Element('a'), ET.Element('b')]) e = ET.Element('elem') - for _ in range(10): - e.insert(0, ET.Element('child')) - - e[0:10:X()] = [] # shouldn't crash + e.extend([ET.Element(f'c{i}') for i in range(10)]) + e[:] = V() def test_treebuilder_start(self): # Issue #27863 @@ -3235,7 +3308,7 @@ class ElementTreeTypeTest(unittest.TestCase): class ElementFindTest(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_find_simple(self): e = ET.XML(SAMPLE_XML) self.assertEqual(e.find('tag').tag, 'tag') @@ -3259,7 +3332,7 @@ class ElementFindTest(unittest.TestCase): # Issue #16922 self.assertEqual(ET.XML('').findtext('empty'), '') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_find_xpath(self): LINEAR_XML = ''' @@ -3282,7 +3355,7 @@ class ElementFindTest(unittest.TestCase): self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]') self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_findall(self): e = ET.XML(SAMPLE_XML) e[2] = ET.XML(SAMPLE_SECTION) @@ -3471,7 +3544,7 @@ class ElementFindTest(unittest.TestCase): with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'): e.findall('/tag') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_find_through_ElementTree(self): e = ET.XML(SAMPLE_XML) self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag') @@ -3674,7 +3747,7 @@ class TreeBuilderTest(unittest.TestCase): a = parser.close() self.assertEqual(a.text, "texttail") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_late_tail_mix_pi_comments(self): # Issue #37399: The tail of an ignored comment could overwrite the text before it. # Test appending tails to comments/pis. @@ -3787,7 +3860,7 @@ class TreeBuilderTest(unittest.TestCase): pass self._check_element_factory_class(MyElement) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_doctype(self): class DoctypeParser: _doctype = None @@ -3865,7 +3938,7 @@ class XMLParserTest(unittest.TestCase): parser.feed(self.sample2) parser.close() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_subclass_doctype(self): _doctype = None class MyParserWithDoctype(ET.XMLParser): @@ -3907,7 +3980,7 @@ class XMLParserTest(unittest.TestCase): parser.feed(self.sample2) parser.close() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_parse_string(self): parser = ET.XMLParser(target=ET.TreeBuilder()) parser.feed(self.sample3) @@ -4365,13 +4438,13 @@ class ParseErrorTest(unittest.TestCase): except ET.ParseError as e: return e - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_error_position(self): self.assertEqual(self._get_error('foo').position, (1, 0)) self.assertEqual(self._get_error('&foo;').position, (1, 5)) self.assertEqual(self._get_error('foobar<').position, (1, 6)) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_error_code(self): import xml.parsers.expat.errors as ERRORS self.assertEqual(self._get_error('foo').code, @@ -4431,7 +4504,7 @@ class NoAcceleratorTest(unittest.TestCase): # -------------------------------------------------------------------- class BoolTest(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_warning(self): e = ET.fromstring('
') msg = ( @@ -4461,7 +4534,7 @@ class C14NTest(unittest.TestCase): # # simple roundtrip tests (from c14n.py) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_simple_roundtrip(self): # Basics self.assertEqual(c14n_roundtrip(""), '') @@ -4502,7 +4575,7 @@ class C14NTest(unittest.TestCase): xml = '' self.assertEqual(c14n_roundtrip(xml), xml) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_c14n_exclusion(self): xml = textwrap.dedent("""\ @@ -4583,7 +4656,7 @@ class C14NTest(unittest.TestCase): # note that this uses generated C14N versions of the standard ET.write # output, not roundtripped C14N (see above). - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_xml_c14n2(self): datadir = findfile("c14n-20", subdir="xmltestdata") full_path = partial(os.path.join, datadir) diff --git a/Lib/xml/dom/__init__.py b/Lib/xml/dom/__init__.py index 97cf9a642..dd7fb996a 100644 --- a/Lib/xml/dom/__init__.py +++ b/Lib/xml/dom/__init__.py @@ -137,4 +137,4 @@ XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" EMPTY_NAMESPACE = None EMPTY_PREFIX = None -from .domreg import getDOMImplementation, registerDOMImplementation +from .domreg import getDOMImplementation, registerDOMImplementation # noqa: F401 diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index db51f350e..16b33b901 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -292,13 +292,6 @@ def _append_child(self, node): childNodes.append(node) node.parentNode = self -def _in_document(node): - # return True iff node is part of a document tree - while node is not None: - if node.nodeType == Node.DOCUMENT_NODE: - return True - node = node.parentNode - return False def _write_data(writer, text, attr): "Writes datachars to writer." @@ -371,6 +364,7 @@ class Attr(Node): def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, prefix=None): self.ownerElement = None + self.ownerDocument = None self._name = qName self.namespaceURI = namespaceURI self._prefix = prefix @@ -696,6 +690,7 @@ class Element(Node): def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, localName=None): + self.ownerDocument = None self.parentNode = None self.tagName = self.nodeName = tagName self.prefix = prefix @@ -1555,7 +1550,7 @@ def _clear_id_cache(node): if node.nodeType == Node.DOCUMENT_NODE: node._id_cache.clear() node._id_search_stack = None - elif _in_document(node): + elif node.ownerDocument: node.ownerDocument._id_cache.clear() node.ownerDocument._id_search_stack= None diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 9bb09ab54..dafe5b1b8 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -267,7 +267,11 @@ class Element: """ # assert iselement(element) - self._children.remove(subelement) + try: + self._children.remove(subelement) + except ValueError: + # to align the error message with the C implementation + raise ValueError("Element.remove(x): element not found") from None def find(self, path, namespaces=None): """Find first matching element by tag name or path. diff --git a/Lib/xml/sax/__init__.py b/Lib/xml/sax/__init__.py index b65731020..fe4582c6f 100644 --- a/Lib/xml/sax/__init__.py +++ b/Lib/xml/sax/__init__.py @@ -21,9 +21,9 @@ expatreader -- Driver that allows use of the Expat parser with SAX. from .xmlreader import InputSource from .handler import ContentHandler, ErrorHandler -from ._exceptions import SAXException, SAXNotRecognizedException, \ - SAXParseException, SAXNotSupportedException, \ - SAXReaderNotAvailable +from ._exceptions import (SAXException, SAXNotRecognizedException, + SAXParseException, SAXNotSupportedException, + SAXReaderNotAvailable) def parse(source, handler, errorHandler=ErrorHandler()): @@ -55,7 +55,7 @@ default_parser_list = ["xml.sax.expatreader"] # tell modulefinder that importing sax potentially imports expatreader _false = 0 if _false: - import xml.sax.expatreader + import xml.sax.expatreader # noqa: F401 import os, sys if not sys.flags.ignore_environment and "PY_SAX_PARSER" in os.environ: @@ -92,3 +92,9 @@ def make_parser(parser_list=()): def _create_parser(parser_name): drv_module = __import__(parser_name,{},{},['create_parser']) return drv_module.create_parser() + + +__all__ = ['ContentHandler', 'ErrorHandler', 'InputSource', 'SAXException', + 'SAXNotRecognizedException', 'SAXNotSupportedException', + 'SAXParseException', 'SAXReaderNotAvailable', + 'default_parser_list', 'make_parser', 'parse', 'parseString'] diff --git a/Lib/xml/sax/handler.py b/Lib/xml/sax/handler.py index e8d417e51..3183c3fe9 100644 --- a/Lib/xml/sax/handler.py +++ b/Lib/xml/sax/handler.py @@ -371,7 +371,7 @@ class LexicalHandler: name is the name of the document element type, public_id the public identifier of the DTD (or None if none were supplied) - and system_id the system identfier of the external subset (or + and system_id the system identifier of the external subset (or None if none were supplied).""" def endDTD(self):