diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py
new file mode 100644
index 000000000..46249e513
--- /dev/null
+++ b/Lib/test/test_minidom.py
@@ -0,0 +1,1788 @@
+# test for xml.dom.minidom
+
+import copy
+import pickle
+import io
+from test import support
+import unittest
+
+import xml.dom.minidom
+
+from xml.dom.minidom import parse, Attr, Node, Document, Element, parseString
+from xml.dom.minidom import getDOMImplementation
+from xml.parsers.expat import ExpatError
+
+
+tstfile = support.findfile("test.xml", subdir="xmltestdata")
+sample = ("\n"
+ "\n"
+ " \n"
+ "]> text\n"
+ " ")
+
+# The tests of DocumentType importing use these helpers to construct
+# the documents to work with, since not all DOM builders actually
+# create the DocumentType nodes.
+def create_doc_without_doctype(doctype=None):
+ return getDOMImplementation().createDocument(None, "doc", doctype)
+
+def create_nonempty_doctype():
+ doctype = getDOMImplementation().createDocumentType("doc", None, None)
+ doctype.entities._seq = []
+ doctype.notations._seq = []
+ notation = xml.dom.minidom.Notation("my-notation", None,
+ "http://xml.python.org/notations/my")
+ doctype.notations._seq.append(notation)
+ entity = xml.dom.minidom.Entity("my-entity", None,
+ "http://xml.python.org/entities/my",
+ "my-notation")
+ entity.version = "1.0"
+ entity.encoding = "utf-8"
+ entity.actualEncoding = "us-ascii"
+ doctype.entities._seq.append(entity)
+ return doctype
+
+def create_doc_with_doctype():
+ doctype = create_nonempty_doctype()
+ doc = create_doc_without_doctype(doctype)
+ doctype.entities.item(0).ownerDocument = doc
+ doctype.notations.item(0).ownerDocument = doc
+ return doc
+
+class MinidomTest(unittest.TestCase):
+ def confirm(self, test, testname = "Test"):
+ self.assertTrue(test, testname)
+
+ def checkWholeText(self, node, s):
+ t = node.wholeText
+ self.assertEqual(t, s, "looking for %r, found %r" % (s, t))
+
+ def testDocumentAsyncAttr(self):
+ doc = Document()
+ self.assertFalse(doc.async_)
+ self.assertFalse(Document.async_)
+
+ def testParseFromBinaryFile(self):
+ with open(tstfile, 'rb') as file:
+ dom = parse(file)
+ dom.unlink()
+ self.assertIsInstance(dom, Document)
+
+ def testParseFromTextFile(self):
+ with open(tstfile, 'r', encoding='iso-8859-1') as file:
+ dom = parse(file)
+ dom.unlink()
+ self.assertIsInstance(dom, Document)
+
+ def testAttrModeSetsParamsAsAttrs(self):
+ attr = Attr("qName", "namespaceURI", "localName", "prefix")
+ self.assertEqual(attr.name, "qName")
+ self.assertEqual(attr.namespaceURI, "namespaceURI")
+ self.assertEqual(attr.prefix, "prefix")
+ self.assertEqual(attr.localName, "localName")
+
+ def testAttrModeSetsNonOptionalAttrs(self):
+ attr = Attr("qName", "namespaceURI", None, "prefix")
+ self.assertEqual(attr.name, "qName")
+ self.assertEqual(attr.namespaceURI, "namespaceURI")
+ self.assertEqual(attr.prefix, "prefix")
+ self.assertEqual(attr.localName, attr.name)
+
+ def testGetElementsByTagName(self):
+ dom = parse(tstfile)
+ self.assertEqual(dom.getElementsByTagName("LI"),
+ dom.documentElement.getElementsByTagName("LI"))
+ dom.unlink()
+
+ def testInsertBefore(self):
+ dom = parseString("")
+ root = dom.documentElement
+ elem = root.childNodes[0]
+ nelem = dom.createElement("element")
+ root.insertBefore(nelem, elem)
+ self.assertEqual(len(root.childNodes), 2)
+ self.assertEqual(root.childNodes.length, 2)
+ self.assertIs(root.childNodes[0], nelem)
+ self.assertIs(root.childNodes.item(0), nelem)
+ self.assertIs(root.childNodes[1], elem)
+ self.assertIs(root.childNodes.item(1), elem)
+ self.assertIs(root.firstChild, nelem)
+ self.assertIs(root.lastChild, elem)
+ self.assertEqual(root.toxml(), "")
+ nelem = dom.createElement("element")
+ root.insertBefore(nelem, None)
+ self.assertEqual(len(root.childNodes), 3)
+ self.assertEqual(root.childNodes.length, 3)
+ self.assertIs(root.childNodes[1], elem)
+ self.assertIs(root.childNodes.item(1), elem)
+ self.assertIs(root.childNodes[2], nelem)
+ self.assertIs(root.childNodes.item(2), nelem)
+ self.assertIs(root.lastChild, nelem)
+ self.assertIs(nelem.previousSibling, elem)
+ self.assertEqual(root.toxml(), "")
+ nelem2 = dom.createElement("bar")
+ root.insertBefore(nelem2, nelem)
+ self.assertEqual(len(root.childNodes), 4)
+ self.assertEqual(root.childNodes.length, 4)
+ self.assertIs(root.childNodes[2], nelem2)
+ self.assertIs(root.childNodes.item(2), nelem2)
+ self.assertIs(root.childNodes[3], nelem)
+ self.assertIs(root.childNodes.item(3), nelem)
+ self.assertIs(nelem2.nextSibling, nelem)
+ self.assertIs(nelem.previousSibling, nelem2)
+ self.assertEqual(root.toxml(),
+ "")
+ dom.unlink()
+
+ def _create_fragment_test_nodes(self):
+ dom = parseString("")
+ orig = dom.createTextNode("original")
+ c1 = dom.createTextNode("foo")
+ c2 = dom.createTextNode("bar")
+ c3 = dom.createTextNode("bat")
+ dom.documentElement.appendChild(orig)
+ frag = dom.createDocumentFragment()
+ frag.appendChild(c1)
+ frag.appendChild(c2)
+ frag.appendChild(c3)
+ return dom, orig, c1, c2, c3, frag
+
+ def testInsertBeforeFragment(self):
+ dom, orig, c1, c2, c3, frag = self._create_fragment_test_nodes()
+ dom.documentElement.insertBefore(frag, None)
+ self.assertTupleEqual(tuple(dom.documentElement.childNodes),
+ (orig, c1, c2, c3),
+ "insertBefore(, None)")
+ frag.unlink()
+ dom.unlink()
+
+ dom, orig, c1, c2, c3, frag = self._create_fragment_test_nodes()
+ dom.documentElement.insertBefore(frag, orig)
+ self.assertTupleEqual(tuple(dom.documentElement.childNodes),
+ (c1, c2, c3, orig),
+ "insertBefore(, orig)")
+ frag.unlink()
+ dom.unlink()
+
+ def testAppendChild(self):
+ dom = parse(tstfile)
+ dom.documentElement.appendChild(dom.createComment("Hello"))
+ self.assertEqual(dom.documentElement.childNodes[-1].nodeName, "#comment")
+ self.assertEqual(dom.documentElement.childNodes[-1].data, "Hello")
+ dom.unlink()
+
+ @support.requires_resource('cpu')
+ def testAppendChildNoQuadraticComplexity(self):
+ impl = getDOMImplementation()
+
+ def work(n):
+ doc = impl.createDocument(None, "some_tag", None)
+ element = doc.documentElement
+ total_calls = 0
+
+ # Count attribute accesses as a proxy for work done
+ def getattribute_counter(self, attr):
+ nonlocal total_calls
+ total_calls += 1
+ return object.__getattribute__(self, attr)
+
+ with support.swap_attr(Element, "__getattribute__", getattribute_counter):
+ for _ in range(n):
+ child = doc.createElement("child")
+ element.appendChild(child)
+ element = child
+ return total_calls
+
+ # Doubling N should not ~quadruple the work.
+ w1 = work(1024)
+ w2 = work(2048)
+ w3 = work(4096)
+
+ self.assertGreater(w1, 0)
+ r1 = w2 / w1
+ r2 = w3 / w2
+ self.assertLess(
+ max(r1, r2), 3.2,
+ msg=f"Possible quadratic behavior: work={w1,w2,w3} ratios={r1,r2}"
+ )
+
+ def testSetAttributeNodeWithoutOwnerDocument(self):
+ # regression test for gh-142754
+ elem = Element("test")
+ attr = Attr("id")
+ attr.value = "test-id"
+ elem.setAttributeNode(attr)
+ self.assertEqual(elem.getAttribute("id"), "test-id")
+
+ def testAppendChildFragment(self):
+ dom, orig, c1, c2, c3, frag = self._create_fragment_test_nodes()
+ dom.documentElement.appendChild(frag)
+ self.assertTupleEqual(tuple(dom.documentElement.childNodes),
+ (orig, c1, c2, c3),
+ "appendChild()")
+ frag.unlink()
+ dom.unlink()
+
+ def testReplaceChildFragment(self):
+ dom, orig, c1, c2, c3, frag = self._create_fragment_test_nodes()
+ dom.documentElement.replaceChild(frag, orig)
+ orig.unlink()
+ self.assertTupleEqual(tuple(dom.documentElement.childNodes), (c1, c2, c3),
+ "replaceChild()")
+ frag.unlink()
+ dom.unlink()
+
+ def testLegalChildren(self):
+ dom = Document()
+ elem = dom.createElement('element')
+ text = dom.createTextNode('text')
+ self.assertRaises(xml.dom.HierarchyRequestErr, dom.appendChild, text)
+
+ dom.appendChild(elem)
+ self.assertRaises(xml.dom.HierarchyRequestErr, dom.insertBefore, text,
+ elem)
+ self.assertRaises(xml.dom.HierarchyRequestErr, dom.replaceChild, text,
+ elem)
+
+ nodemap = elem.attributes
+ self.assertRaises(xml.dom.HierarchyRequestErr, nodemap.setNamedItem,
+ text)
+ self.assertRaises(xml.dom.HierarchyRequestErr, nodemap.setNamedItemNS,
+ text)
+
+ elem.appendChild(text)
+ dom.unlink()
+
+ def testNamedNodeMapSetItem(self):
+ dom = Document()
+ elem = dom.createElement('element')
+ attrs = elem.attributes
+ attrs["foo"] = "bar"
+ a = attrs.item(0)
+ self.assertIs(a.ownerDocument, dom,
+ "NamedNodeMap.__setitem__() sets ownerDocument")
+ self.assertIs(a.ownerElement, elem,
+ "NamedNodeMap.__setitem__() sets ownerElement")
+ self.assertEqual(a.value, "bar",
+ "NamedNodeMap.__setitem__() sets value")
+ self.assertEqual(a.nodeValue, "bar",
+ "NamedNodeMap.__setitem__() sets nodeValue")
+ elem.unlink()
+ dom.unlink()
+
+ def testNonZero(self):
+ dom = parse(tstfile)
+ self.assertTrue(dom) # should not be zero
+ dom.appendChild(dom.createComment("foo"))
+ self.assertFalse(dom.childNodes[-1].childNodes)
+ dom.unlink()
+
+ def testUnlink(self):
+ dom = parse(tstfile)
+ self.assertTrue(dom.childNodes)
+ dom.unlink()
+ self.assertFalse(dom.childNodes)
+
+ def testContext(self):
+ with parse(tstfile) as dom:
+ self.assertTrue(dom.childNodes)
+ self.assertFalse(dom.childNodes)
+
+ def testElement(self):
+ dom = Document()
+ dom.appendChild(dom.createElement("abc"))
+ self.assertTrue(dom.documentElement)
+ dom.unlink()
+
+ def testAAA(self):
+ dom = parseString("")
+ el = dom.documentElement
+ el.setAttribute("spam", "jam2")
+ self.assertEqual(el.toxml(), '', "testAAA")
+ a = el.getAttributeNode("spam")
+ self.assertIs(a.ownerDocument, dom,
+ "setAttribute() sets ownerDocument")
+ self.assertIs(a.ownerElement, dom.documentElement,
+ "setAttribute() sets ownerElement")
+ dom.unlink()
+
+ def testAAB(self):
+ dom = parseString("")
+ el = dom.documentElement
+ el.setAttribute("spam", "jam")
+ el.setAttribute("spam", "jam2")
+ self.assertEqual(el.toxml(), '', "testAAB")
+ dom.unlink()
+
+ def testAddAttr(self):
+ dom = Document()
+ child = dom.appendChild(dom.createElement("abc"))
+
+ child.setAttribute("def", "ghi")
+ self.assertEqual(child.getAttribute("def"), "ghi")
+ self.assertEqual(child.attributes["def"].value, "ghi")
+
+ child.setAttribute("jkl", "mno")
+ self.assertEqual(child.getAttribute("jkl"), "mno")
+ self.assertEqual(child.attributes["jkl"].value, "mno")
+
+ self.assertEqual(len(child.attributes), 2)
+
+ child.setAttribute("def", "newval")
+ self.assertEqual(child.getAttribute("def"), "newval")
+ self.assertEqual(child.attributes["def"].value, "newval")
+
+ self.assertEqual(len(child.attributes), 2)
+ dom.unlink()
+
+ def testDeleteAttr(self):
+ dom = Document()
+ child = dom.appendChild(dom.createElement("abc"))
+
+ self.assertEqual(len(child.attributes), 0)
+ child.setAttribute("def", "ghi")
+ self.assertEqual(len(child.attributes), 1)
+ del child.attributes["def"]
+ self.assertEqual(len(child.attributes), 0)
+ dom.unlink()
+
+ def testRemoveAttr(self):
+ dom = Document()
+ child = dom.appendChild(dom.createElement("abc"))
+
+ child.setAttribute("def", "ghi")
+ self.assertEqual(len(child.attributes), 1)
+ self.assertRaises(xml.dom.NotFoundErr, child.removeAttribute, "foo")
+ child.removeAttribute("def")
+ self.assertEqual(len(child.attributes), 0)
+ dom.unlink()
+
+ def testRemoveAttrNS(self):
+ dom = Document()
+ child = dom.appendChild(
+ dom.createElementNS("http://www.python.org", "python:abc"))
+ child.setAttributeNS("http://www.w3.org", "xmlns:python",
+ "http://www.python.org")
+ child.setAttributeNS("http://www.python.org", "python:abcattr", "foo")
+ self.assertRaises(xml.dom.NotFoundErr, child.removeAttributeNS,
+ "foo", "http://www.python.org")
+ self.assertEqual(len(child.attributes), 2)
+ child.removeAttributeNS("http://www.python.org", "abcattr")
+ self.assertEqual(len(child.attributes), 1)
+ dom.unlink()
+
+ def testRemoveAttributeNode(self):
+ dom = Document()
+ child = dom.appendChild(dom.createElement("foo"))
+ child.setAttribute("spam", "jam")
+ self.assertEqual(len(child.attributes), 1)
+ node = child.getAttributeNode("spam")
+ self.assertRaises(xml.dom.NotFoundErr, child.removeAttributeNode,
+ None)
+ self.assertIs(node, child.removeAttributeNode(node))
+ self.assertEqual(len(child.attributes), 0)
+ self.assertIsNone(child.getAttributeNode("spam"))
+ dom2 = Document()
+ child2 = dom2.appendChild(dom2.createElement("foo"))
+ node2 = child2.getAttributeNode("spam")
+ self.assertRaises(xml.dom.NotFoundErr, child2.removeAttributeNode,
+ node2)
+ dom.unlink()
+
+ def testHasAttribute(self):
+ dom = Document()
+ child = dom.appendChild(dom.createElement("foo"))
+ child.setAttribute("spam", "jam")
+ self.assertTrue(child.hasAttribute("spam"))
+
+ def testChangeAttr(self):
+ dom = parseString("")
+ el = dom.documentElement
+ el.setAttribute("spam", "jam")
+ self.assertEqual(len(el.attributes), 1)
+ el.setAttribute("spam", "bam")
+ # Set this attribute to be an ID and make sure that doesn't change
+ # when changing the value:
+ el.setIdAttribute("spam")
+ self.assertEqual(len(el.attributes), 1)
+ self.assertEqual(el.attributes["spam"].value, "bam")
+ self.assertEqual(el.attributes["spam"].nodeValue, "bam")
+ self.assertEqual(el.getAttribute("spam"), "bam")
+ self.assertTrue(el.getAttributeNode("spam").isId)
+ el.attributes["spam"] = "ham"
+ self.assertEqual(len(el.attributes), 1)
+ self.assertEqual(el.attributes["spam"].value, "ham")
+ self.assertEqual(el.attributes["spam"].nodeValue, "ham")
+ self.assertEqual(el.getAttribute("spam"), "ham")
+ self.assertTrue(el.attributes["spam"].isId)
+ el.setAttribute("spam2", "bam")
+ self.assertEqual(len(el.attributes), 2)
+ self.assertEqual(el.attributes["spam"].value, "ham")
+ self.assertEqual(el.attributes["spam"].nodeValue, "ham")
+ self.assertEqual(el.getAttribute("spam"), "ham")
+ self.assertEqual(el.attributes["spam2"].value, "bam")
+ self.assertEqual(el.attributes["spam2"].nodeValue, "bam")
+ self.assertEqual(el.getAttribute("spam2"), "bam")
+ el.attributes["spam2"] = "bam2"
+
+ self.assertEqual(len(el.attributes), 2)
+ self.assertEqual(el.attributes["spam"].value, "ham")
+ self.assertEqual(el.attributes["spam"].nodeValue, "ham")
+ self.assertEqual(el.getAttribute("spam"), "ham")
+ self.assertEqual(el.attributes["spam2"].value, "bam2")
+ self.assertEqual(el.attributes["spam2"].nodeValue, "bam2")
+ self.assertEqual(el.getAttribute("spam2"), "bam2")
+ dom.unlink()
+
+ def testGetAttrList(self):
+ dom = parseString("")
+ self.addCleanup(dom.unlink)
+ el = dom.documentElement
+ el.setAttribute("spam", "jam")
+ self.assertEqual(len(el.attributes.items()), 1)
+ el.setAttribute("foo", "bar")
+ items = el.attributes.items()
+ self.assertEqual(len(items), 2)
+ self.assertIn(('spam', 'jam'), items)
+ self.assertIn(('foo', 'bar'), items)
+
+ def testGetAttrValues(self):
+ dom = parseString("")
+ self.addCleanup(dom.unlink)
+ el = dom.documentElement
+ el.setAttribute("spam", "jam")
+ values = [x.value for x in el.attributes.values()]
+ self.assertIn("jam", values)
+ el.setAttribute("foo", "bar")
+ values = [x.value for x in el.attributes.values()]
+ self.assertIn("bar", values)
+ self.assertIn("jam", values)
+
+ def testGetAttribute(self):
+ dom = Document()
+ child = dom.appendChild(
+ dom.createElementNS("http://www.python.org", "python:abc"))
+ self.assertEqual(child.getAttribute('missing'), '')
+
+ def testGetAttributeNS(self):
+ dom = Document()
+ child = dom.appendChild(
+ dom.createElementNS("http://www.python.org", "python:abc"))
+ child.setAttributeNS("http://www.w3.org", "xmlns:python",
+ "http://www.python.org")
+ self.assertEqual(child.getAttributeNS("http://www.w3.org", "python"),
+ 'http://www.python.org')
+ self.assertEqual(child.getAttributeNS("http://www.w3.org", "other"),
+ '')
+ child2 = child.appendChild(dom.createElement('abc'))
+ self.assertEqual(child2.getAttributeNS("http://www.python.org", "missing"),
+ '')
+
+ def testGetAttributeNode(self): pass
+
+ def testGetElementsByTagNameNS(self):
+ d="""
+
+ """
+ dom = parseString(d)
+ elems = dom.getElementsByTagNameNS("http://pyxml.sf.net/minidom",
+ "myelem")
+ self.assertEqual(len(elems), 1)
+ self.assertEqual(elems[0].namespaceURI, "http://pyxml.sf.net/minidom")
+ self.assertEqual(elems[0].localName, "myelem")
+ self.assertEqual(elems[0].prefix, "minidom")
+ self.assertEqual(elems[0].tagName, "minidom:myelem")
+ self.assertEqual(elems[0].nodeName, "minidom:myelem")
+ dom.unlink()
+
+ def get_empty_nodelist_from_elements_by_tagName_ns_helper(self, doc, nsuri,
+ lname):
+ nodelist = doc.getElementsByTagNameNS(nsuri, lname)
+ self.assertEqual(len(nodelist), 0)
+
+ def testGetEmptyNodeListFromElementsByTagNameNS(self):
+ doc = parseString('')
+ self.get_empty_nodelist_from_elements_by_tagName_ns_helper(
+ doc, 'http://xml.python.org/namespaces/a', 'localname')
+ self.get_empty_nodelist_from_elements_by_tagName_ns_helper(
+ doc, '*', 'splat')
+ self.get_empty_nodelist_from_elements_by_tagName_ns_helper(
+ doc, 'http://xml.python.org/namespaces/a', '*')
+
+ doc = parseString('')
+ self.get_empty_nodelist_from_elements_by_tagName_ns_helper(
+ doc, "http://xml.python.org/splat", "not-there")
+ self.get_empty_nodelist_from_elements_by_tagName_ns_helper(
+ doc, "*", "not-there")
+ self.get_empty_nodelist_from_elements_by_tagName_ns_helper(
+ doc, "http://somewhere.else.net/not-there", "e")
+
+ def testElementReprAndStr(self):
+ dom = Document()
+ el = dom.appendChild(dom.createElement("abc"))
+ string1 = repr(el)
+ string2 = str(el)
+ self.assertEqual(string1, string2)
+ dom.unlink()
+
+ def testElementReprAndStrUnicode(self):
+ dom = Document()
+ el = dom.appendChild(dom.createElement("abc"))
+ string1 = repr(el)
+ string2 = str(el)
+ self.assertEqual(string1, string2)
+ dom.unlink()
+
+ def testElementReprAndStrUnicodeNS(self):
+ dom = Document()
+ el = dom.appendChild(
+ dom.createElementNS("http://www.slashdot.org", "slash:abc"))
+ string1 = repr(el)
+ string2 = str(el)
+ self.assertEqual(string1, string2)
+ self.assertIn("slash:abc", string1)
+ dom.unlink()
+
+ def testAttributeRepr(self):
+ dom = Document()
+ el = dom.appendChild(dom.createElement("abc"))
+ node = el.setAttribute("abc", "def")
+ self.assertEqual(str(node), repr(node))
+ dom.unlink()
+
+ def testWriteXML(self):
+ str = ''
+ dom = parseString(str)
+ domstr = dom.toxml()
+ dom.unlink()
+ self.assertEqual(str, domstr)
+
+ def test_toxml_quote_text(self):
+ dom = Document()
+ elem = dom.appendChild(dom.createElement('elem'))
+ elem.appendChild(dom.createTextNode('&<>"'))
+ cr = elem.appendChild(dom.createElement('cr'))
+ cr.appendChild(dom.createTextNode('\r'))
+ crlf = elem.appendChild(dom.createElement('crlf'))
+ crlf.appendChild(dom.createTextNode('\r\n'))
+ lflf = elem.appendChild(dom.createElement('lflf'))
+ lflf.appendChild(dom.createTextNode('\n\n'))
+ ws = elem.appendChild(dom.createElement('ws'))
+ ws.appendChild(dom.createTextNode('\t\n\r '))
+ domstr = dom.toxml()
+ dom.unlink()
+ self.assertEqual(domstr, ''
+ '&<>"'
+ '\r'
+ '\r\n'
+ '\n\n'
+ '\t\n\r ')
+
+ def test_toxml_quote_attrib(self):
+ dom = Document()
+ elem = dom.appendChild(dom.createElement('elem'))
+ elem.setAttribute("a", '&<>"')
+ elem.setAttribute("cr", "\r")
+ elem.setAttribute("lf", "\n")
+ elem.setAttribute("crlf", "\r\n")
+ elem.setAttribute("lflf", "\n\n")
+ elem.setAttribute("ws", "\t\n\r ")
+ domstr = dom.toxml()
+ dom.unlink()
+ self.assertEqual(domstr, ''
+ '')
+
+ def testAltNewline(self):
+ str = '\n\n'
+ dom = parseString(str)
+ domstr = dom.toprettyxml(newl="\r\n")
+ dom.unlink()
+ self.assertEqual(domstr, str.replace("\n", "\r\n"))
+
+ def test_toprettyxml_with_text_nodes(self):
+ # see issue #4147, text nodes are not indented
+ decl = '\n'
+ self.assertEqual(parseString('A').toprettyxml(),
+ decl + 'A\n')
+ self.assertEqual(parseString('AA').toprettyxml(),
+ decl + '\n\tA\n\tA\n\n')
+ self.assertEqual(parseString('AA').toprettyxml(),
+ decl + '\n\tA\n\tA\n\n')
+ self.assertEqual(parseString('AA').toprettyxml(),
+ decl + '\n\tA\n\tA\n\n')
+ self.assertEqual(parseString('AAA').toprettyxml(),
+ decl + '\n\tA\n\tA\n\tA\n\n')
+
+ def test_toprettyxml_with_adjacent_text_nodes(self):
+ # see issue #4147, adjacent text nodes are indented normally
+ dom = Document()
+ elem = dom.createElement('elem')
+ elem.appendChild(dom.createTextNode('TEXT'))
+ elem.appendChild(dom.createTextNode('TEXT'))
+ dom.appendChild(elem)
+ decl = '\n'
+ self.assertEqual(dom.toprettyxml(),
+ decl + '\n\tTEXT\n\tTEXT\n\n')
+
+ def test_toprettyxml_preserves_content_of_text_node(self):
+ # see issue #4147
+ for str in ('A', 'C'):
+ dom = parseString(str)
+ dom2 = parseString(dom.toprettyxml())
+ self.assertEqual(
+ dom.getElementsByTagName('B')[0].childNodes[0].toxml(),
+ dom2.getElementsByTagName('B')[0].childNodes[0].toxml())
+
+ def testProcessingInstruction(self):
+ dom = parseString('')
+ pi = dom.documentElement.firstChild
+ self.assertEqual(pi.target, "mypi")
+ self.assertEqual(pi.data, "data \t\n ")
+ self.assertEqual(pi.nodeName, "mypi")
+ self.assertEqual(pi.nodeType, Node.PROCESSING_INSTRUCTION_NODE)
+ self.assertIsNone(pi.attributes)
+ self.assertFalse(pi.hasChildNodes())
+ self.assertEqual(len(pi.childNodes), 0)
+ self.assertIsNone(pi.firstChild)
+ self.assertIsNone(pi.lastChild)
+ self.assertIsNone(pi.localName)
+ self.assertEqual(pi.namespaceURI, xml.dom.EMPTY_NAMESPACE)
+
+ def testProcessingInstructionRepr(self):
+ dom = parseString('')
+ pi = dom.documentElement.firstChild
+ self.assertEqual(str(pi.nodeType), repr(pi.nodeType))
+
+ def testTextRepr(self):
+ dom = Document()
+ self.addCleanup(dom.unlink)
+ elem = dom.createElement("elem")
+ elem.appendChild(dom.createTextNode("foo"))
+ el = elem.firstChild
+ self.assertEqual(str(el), repr(el))
+ self.assertEqual('', str(el))
+
+ def testWriteText(self): pass
+
+ def testDocumentElement(self): pass
+
+ def testTooManyDocumentElements(self):
+ doc = parseString("")
+ elem = doc.createElement("extra")
+ # Should raise an exception when adding an extra document element.
+ self.assertRaises(xml.dom.HierarchyRequestErr, doc.appendChild, elem)
+ elem.unlink()
+ doc.unlink()
+
+ def testCreateElementNS(self): pass
+
+ def testCreateAttributeNS(self): pass
+
+ def testParse(self): pass
+
+ def testParseString(self): pass
+
+ def testComment(self): pass
+
+ def testAttrListItem(self): pass
+
+ def testAttrListItems(self): pass
+
+ def testAttrListItemNS(self): pass
+
+ def testAttrListKeys(self): pass
+
+ def testAttrListKeysNS(self): pass
+
+ def testRemoveNamedItem(self):
+ doc = parseString("")
+ e = doc.documentElement
+ attrs = e.attributes
+ a1 = e.getAttributeNode("a")
+ a2 = attrs.removeNamedItem("a")
+ self.assertTrue(a1.isSameNode(a2))
+ self.assertRaises(xml.dom.NotFoundErr, attrs.removeNamedItem, "a")
+
+ def testRemoveNamedItemNS(self):
+ doc = parseString("")
+ e = doc.documentElement
+ attrs = e.attributes
+ a1 = e.getAttributeNodeNS("http://xml.python.org/", "b")
+ a2 = attrs.removeNamedItemNS("http://xml.python.org/", "b")
+ self.assertTrue(a1.isSameNode(a2))
+ self.assertRaises(xml.dom.NotFoundErr, attrs.removeNamedItemNS,
+ "http://xml.python.org/", "b")
+
+ def testAttrListValues(self): pass
+
+ def testAttrListLength(self): pass
+
+ def testAttrList__getitem__(self): pass
+
+ def testAttrList__setitem__(self): pass
+
+ def testSetAttrValueandNodeValue(self): pass
+
+ def testParseElement(self): pass
+
+ def testParseAttributes(self): pass
+
+ def testParseElementNamespaces(self): pass
+
+ def testParseAttributeNamespaces(self): pass
+
+ def testParseProcessingInstructions(self): pass
+
+ def testChildNodes(self): pass
+
+ def testFirstChild(self): pass
+
+ def testHasChildNodes(self):
+ dom = parseString("")
+ doc = dom.documentElement
+ self.assertTrue(doc.hasChildNodes())
+ dom2 = parseString("")
+ doc2 = dom2.documentElement
+ self.assertFalse(doc2.hasChildNodes())
+
+ def _testCloneElementCopiesAttributes(self, e1, e2, test):
+ attrs1 = e1.attributes
+ attrs2 = e2.attributes
+ keys1 = list(attrs1.keys())
+ keys2 = list(attrs2.keys())
+ keys1.sort()
+ keys2.sort()
+ self.assertEqual(keys1, keys2)
+ for i in range(len(keys1)):
+ a1 = attrs1.item(i)
+ a2 = attrs2.item(i)
+ self.assertIsNot(a1, a2)
+ self.assertEqual(a1.value, a2.value)
+ self.assertEqual(a1.nodeValue, a2.nodeValue)
+ self.assertEqual(a1.namespaceURI,a2.namespaceURI)
+ self.assertEqual(a1.localName, a2.localName)
+ self.assertIs(a2.ownerElement, e2)
+
+ def _setupCloneElement(self, deep):
+ dom = parseString("")
+ root = dom.documentElement
+ clone = root.cloneNode(deep)
+ self._testCloneElementCopiesAttributes(
+ root, clone, "testCloneElement" + (deep and "Deep" or "Shallow"))
+ # mutilate the original so shared data is detected
+ root.tagName = root.nodeName = "MODIFIED"
+ root.setAttribute("attr", "NEW VALUE")
+ root.setAttribute("added", "VALUE")
+ return dom, clone
+
+ def testCloneElementShallow(self):
+ dom, clone = self._setupCloneElement(0)
+ self.assertEqual(len(clone.childNodes), 0)
+ self.assertEqual(clone.childNodes.length, 0)
+ self.assertIsNone(clone.parentNode)
+ self.assertEqual(clone.toxml(), '')
+
+ dom.unlink()
+
+ def testCloneElementDeep(self):
+ dom, clone = self._setupCloneElement(1)
+ self.assertEqual(len(clone.childNodes), 1)
+ self.assertEqual(clone.childNodes.length, 1)
+ self.assertIsNone(clone.parentNode)
+ self.assertTrue(clone.toxml(), '')
+ dom.unlink()
+
+ def testCloneDocumentShallow(self):
+ doc = parseString("\n"
+ ""
+ "\n"
+ "]>\n"
+ "")
+ doc2 = doc.cloneNode(0)
+ self.assertIsNone(doc2,
+ "testCloneDocumentShallow:"
+ " shallow cloning of documents makes no sense!")
+
+ def testCloneDocumentDeep(self):
+ doc = parseString("\n"
+ ""
+ "\n"
+ "]>\n"
+ "")
+ doc2 = doc.cloneNode(1)
+ self.assertFalse((doc.isSameNode(doc2) or doc2.isSameNode(doc)),
+ "testCloneDocumentDeep: document objects not distinct")
+ self.assertEqual(len(doc.childNodes), len(doc2.childNodes),
+ "testCloneDocumentDeep: wrong number of Document children")
+ self.assertEqual(doc2.documentElement.nodeType, Node.ELEMENT_NODE,
+ "testCloneDocumentDeep: documentElement not an ELEMENT_NODE")
+ self.assertTrue(doc2.documentElement.ownerDocument.isSameNode(doc2),
+ "testCloneDocumentDeep: documentElement owner is not new document")
+ self.assertFalse(doc.documentElement.isSameNode(doc2.documentElement),
+ "testCloneDocumentDeep: documentElement should not be shared")
+ if doc.doctype is not None:
+ # check the doctype iff the original DOM maintained it
+ self.assertEqual(doc2.doctype.nodeType, Node.DOCUMENT_TYPE_NODE,
+ "testCloneDocumentDeep: doctype not a DOCUMENT_TYPE_NODE")
+ self.assertTrue(doc2.doctype.ownerDocument.isSameNode(doc2))
+ self.assertFalse(doc.doctype.isSameNode(doc2.doctype))
+
+ def testCloneDocumentTypeDeepOk(self):
+ doctype = create_nonempty_doctype()
+ clone = doctype.cloneNode(1)
+ self.confirm(clone is not None
+ and clone.nodeName == doctype.nodeName
+ and clone.name == doctype.name
+ and clone.publicId == doctype.publicId
+ and clone.systemId == doctype.systemId
+ and len(clone.entities) == len(doctype.entities)
+ and clone.entities.item(len(clone.entities)) is None
+ and len(clone.notations) == len(doctype.notations)
+ and clone.notations.item(len(clone.notations)) is None
+ and len(clone.childNodes) == 0)
+ for i in range(len(doctype.entities)):
+ se = doctype.entities.item(i)
+ ce = clone.entities.item(i)
+ self.confirm((not se.isSameNode(ce))
+ and (not ce.isSameNode(se))
+ and ce.nodeName == se.nodeName
+ and ce.notationName == se.notationName
+ and ce.publicId == se.publicId
+ and ce.systemId == se.systemId
+ and ce.encoding == se.encoding
+ and ce.actualEncoding == se.actualEncoding
+ and ce.version == se.version)
+ for i in range(len(doctype.notations)):
+ sn = doctype.notations.item(i)
+ cn = clone.notations.item(i)
+ self.confirm((not sn.isSameNode(cn))
+ and (not cn.isSameNode(sn))
+ and cn.nodeName == sn.nodeName
+ and cn.publicId == sn.publicId
+ and cn.systemId == sn.systemId)
+
+ def testCloneDocumentTypeDeepNotOk(self):
+ doc = create_doc_with_doctype()
+ clone = doc.doctype.cloneNode(1)
+ self.assertIsNone(clone)
+
+ def testCloneDocumentTypeShallowOk(self):
+ doctype = create_nonempty_doctype()
+ clone = doctype.cloneNode(0)
+ self.confirm(clone is not None
+ and clone.nodeName == doctype.nodeName
+ and clone.name == doctype.name
+ and clone.publicId == doctype.publicId
+ and clone.systemId == doctype.systemId
+ and len(clone.entities) == 0
+ and clone.entities.item(0) is None
+ and len(clone.notations) == 0
+ and clone.notations.item(0) is None
+ and len(clone.childNodes) == 0)
+
+ def testCloneDocumentTypeShallowNotOk(self):
+ doc = create_doc_with_doctype()
+ clone = doc.doctype.cloneNode(0)
+ self.assertIsNone(clone)
+
+ def check_import_document(self, deep, testName):
+ doc1 = parseString("")
+ doc2 = parseString("")
+ self.assertRaises(xml.dom.NotSupportedErr, doc1.importNode, doc2, deep)
+
+ def testImportDocumentShallow(self):
+ self.check_import_document(0, "testImportDocumentShallow")
+
+ def testImportDocumentDeep(self):
+ self.check_import_document(1, "testImportDocumentDeep")
+
+ def testImportDocumentTypeShallow(self):
+ src = create_doc_with_doctype()
+ target = create_doc_without_doctype()
+ self.assertRaises(xml.dom.NotSupportedErr, target.importNode,
+ src.doctype, 0)
+
+ def testImportDocumentTypeDeep(self):
+ src = create_doc_with_doctype()
+ target = create_doc_without_doctype()
+ self.assertRaises(xml.dom.NotSupportedErr, target.importNode,
+ src.doctype, 1)
+
+ # Testing attribute clones uses a helper, and should always be deep,
+ # even if the argument to cloneNode is false.
+ def check_clone_attribute(self, deep, testName):
+ doc = parseString("")
+ attr = doc.documentElement.getAttributeNode("attr")
+ self.assertIsNotNone(attr)
+ clone = attr.cloneNode(deep)
+ self.assertFalse(clone.isSameNode(attr))
+ self.assertFalse(attr.isSameNode(clone))
+ self.assertIsNone(clone.ownerElement,
+ testName + ": ownerElement should be None")
+ self.confirm(clone.ownerDocument.isSameNode(attr.ownerDocument),
+ testName + ": ownerDocument does not match")
+ self.confirm(clone.specified,
+ testName + ": cloned attribute must have specified == True")
+
+ def testCloneAttributeShallow(self):
+ self.check_clone_attribute(0, "testCloneAttributeShallow")
+
+ def testCloneAttributeDeep(self):
+ self.check_clone_attribute(1, "testCloneAttributeDeep")
+
+ def check_clone_pi(self, deep, testName):
+ doc = parseString("")
+ pi = doc.firstChild
+ self.assertEqual(pi.nodeType, Node.PROCESSING_INSTRUCTION_NODE)
+ clone = pi.cloneNode(deep)
+ self.confirm(clone.target == pi.target
+ and clone.data == pi.data)
+
+ def testClonePIShallow(self):
+ self.check_clone_pi(0, "testClonePIShallow")
+
+ def testClonePIDeep(self):
+ self.check_clone_pi(1, "testClonePIDeep")
+
+ def check_clone_node_entity(self, clone_document):
+ # bpo-35052: Test user data handler in cloneNode() on a document with
+ # an entity
+ document = xml.dom.minidom.parseString("""
+
+ ]
+ >
+ Don't let entities make you frown ⌣
+ """.strip())
+
+ class Handler:
+ def handle(self, operation, key, data, src, dst):
+ self.operation = operation
+ self.key = key
+ self.data = data
+ self.src = src
+ self.dst = dst
+
+ handler = Handler()
+ doctype = document.doctype
+ entity = doctype.entities['smile']
+ entity.setUserData("key", "data", handler)
+
+ if clone_document:
+ # clone Document
+ clone = document.cloneNode(deep=True)
+
+ self.assertEqual(clone.documentElement.firstChild.wholeText,
+ "Don't let entities make you frown ☺")
+ operation = xml.dom.UserDataHandler.NODE_IMPORTED
+ dst = clone.doctype.entities['smile']
+ else:
+ # clone DocumentType
+ with support.swap_attr(doctype, 'ownerDocument', None):
+ clone = doctype.cloneNode(deep=True)
+
+ operation = xml.dom.UserDataHandler.NODE_CLONED
+ dst = clone.entities['smile']
+
+ self.assertEqual(handler.operation, operation)
+ self.assertEqual(handler.key, "key")
+ self.assertEqual(handler.data, "data")
+ self.assertIs(handler.src, entity)
+ self.assertIs(handler.dst, dst)
+
+ def testCloneNodeEntity(self):
+ self.check_clone_node_entity(False)
+ self.check_clone_node_entity(True)
+
+ def testNormalize(self):
+ doc = parseString("")
+ root = doc.documentElement
+ root.appendChild(doc.createTextNode("first"))
+ root.appendChild(doc.createTextNode("second"))
+ self.confirm(len(root.childNodes) == 2
+ and root.childNodes.length == 2,
+ "testNormalize -- preparation")
+ doc.normalize()
+ self.confirm(len(root.childNodes) == 1
+ and root.childNodes.length == 1
+ and root.firstChild is root.lastChild
+ and root.firstChild.data == "firstsecond"
+ , "testNormalize -- result")
+ doc.unlink()
+
+ doc = parseString("")
+ root = doc.documentElement
+ root.appendChild(doc.createTextNode(""))
+ doc.normalize()
+ self.confirm(len(root.childNodes) == 0
+ and root.childNodes.length == 0,
+ "testNormalize -- single empty node removed")
+ doc.unlink()
+
+ def testNormalizeCombineAndNextSibling(self):
+ doc = parseString("")
+ root = doc.documentElement
+ root.appendChild(doc.createTextNode("first"))
+ root.appendChild(doc.createTextNode("second"))
+ root.appendChild(doc.createElement("i"))
+ self.confirm(len(root.childNodes) == 3
+ and root.childNodes.length == 3,
+ "testNormalizeCombineAndNextSibling -- preparation")
+ doc.normalize()
+ self.confirm(len(root.childNodes) == 2
+ and root.childNodes.length == 2
+ and root.firstChild.data == "firstsecond"
+ and root.firstChild is not root.lastChild
+ and root.firstChild.nextSibling is root.lastChild
+ and root.firstChild.previousSibling is None
+ and root.lastChild.previousSibling is root.firstChild
+ and root.lastChild.nextSibling is None
+ , "testNormalizeCombinedAndNextSibling -- result")
+ doc.unlink()
+
+ def testNormalizeDeleteWithPrevSibling(self):
+ doc = parseString("")
+ root = doc.documentElement
+ root.appendChild(doc.createTextNode("first"))
+ root.appendChild(doc.createTextNode(""))
+ self.confirm(len(root.childNodes) == 2
+ and root.childNodes.length == 2,
+ "testNormalizeDeleteWithPrevSibling -- preparation")
+ doc.normalize()
+ self.confirm(len(root.childNodes) == 1
+ and root.childNodes.length == 1
+ and root.firstChild.data == "first"
+ and root.firstChild is root.lastChild
+ and root.firstChild.nextSibling is None
+ and root.firstChild.previousSibling is None
+ , "testNormalizeDeleteWithPrevSibling -- result")
+ doc.unlink()
+
+ def testNormalizeDeleteWithNextSibling(self):
+ doc = parseString("")
+ root = doc.documentElement
+ root.appendChild(doc.createTextNode(""))
+ root.appendChild(doc.createTextNode("second"))
+ self.confirm(len(root.childNodes) == 2
+ and root.childNodes.length == 2,
+ "testNormalizeDeleteWithNextSibling -- preparation")
+ doc.normalize()
+ self.confirm(len(root.childNodes) == 1
+ and root.childNodes.length == 1
+ and root.firstChild.data == "second"
+ and root.firstChild is root.lastChild
+ and root.firstChild.nextSibling is None
+ and root.firstChild.previousSibling is None
+ , "testNormalizeDeleteWithNextSibling -- result")
+ doc.unlink()
+
+ def testNormalizeDeleteWithTwoNonTextSiblings(self):
+ doc = parseString("")
+ root = doc.documentElement
+ root.appendChild(doc.createElement("i"))
+ root.appendChild(doc.createTextNode(""))
+ root.appendChild(doc.createElement("i"))
+ self.confirm(len(root.childNodes) == 3
+ and root.childNodes.length == 3,
+ "testNormalizeDeleteWithTwoSiblings -- preparation")
+ doc.normalize()
+ self.confirm(len(root.childNodes) == 2
+ and root.childNodes.length == 2
+ and root.firstChild is not root.lastChild
+ and root.firstChild.nextSibling is root.lastChild
+ and root.firstChild.previousSibling is None
+ and root.lastChild.previousSibling is root.firstChild
+ and root.lastChild.nextSibling is None
+ , "testNormalizeDeleteWithTwoSiblings -- result")
+ doc.unlink()
+
+ def testNormalizeDeleteAndCombine(self):
+ doc = parseString("")
+ root = doc.documentElement
+ root.appendChild(doc.createTextNode(""))
+ root.appendChild(doc.createTextNode("second"))
+ root.appendChild(doc.createTextNode(""))
+ root.appendChild(doc.createTextNode("fourth"))
+ root.appendChild(doc.createTextNode(""))
+ self.confirm(len(root.childNodes) == 5
+ and root.childNodes.length == 5,
+ "testNormalizeDeleteAndCombine -- preparation")
+ doc.normalize()
+ self.confirm(len(root.childNodes) == 1
+ and root.childNodes.length == 1
+ and root.firstChild is root.lastChild
+ and root.firstChild.data == "secondfourth"
+ and root.firstChild.previousSibling is None
+ and root.firstChild.nextSibling is None
+ , "testNormalizeDeleteAndCombine -- result")
+ doc.unlink()
+
+ def testNormalizeRecursion(self):
+ doc = parseString(""
+ ""
+ ""
+ "t"
+ #
+ #x
+ ""
+ ""
+ ""
+ "t2"
+ #x2
+ ""
+ "t3"
+ #x3
+ ""
+ #
+ "")
+ root = doc.documentElement
+ root.childNodes[0].appendChild(doc.createTextNode(""))
+ root.childNodes[0].appendChild(doc.createTextNode("x"))
+ root.childNodes[1].childNodes[0].appendChild(doc.createTextNode("x2"))
+ root.childNodes[1].appendChild(doc.createTextNode("x3"))
+ root.appendChild(doc.createTextNode(""))
+ self.confirm(len(root.childNodes) == 3
+ and root.childNodes.length == 3
+ and len(root.childNodes[0].childNodes) == 4
+ and root.childNodes[0].childNodes.length == 4
+ and len(root.childNodes[1].childNodes) == 3
+ and root.childNodes[1].childNodes.length == 3
+ and len(root.childNodes[1].childNodes[0].childNodes) == 2
+ and root.childNodes[1].childNodes[0].childNodes.length == 2
+ , "testNormalize2 -- preparation")
+ doc.normalize()
+ self.confirm(len(root.childNodes) == 2
+ and root.childNodes.length == 2
+ and len(root.childNodes[0].childNodes) == 2
+ and root.childNodes[0].childNodes.length == 2
+ and len(root.childNodes[1].childNodes) == 2
+ and root.childNodes[1].childNodes.length == 2
+ and len(root.childNodes[1].childNodes[0].childNodes) == 1
+ and root.childNodes[1].childNodes[0].childNodes.length == 1
+ , "testNormalize2 -- childNodes lengths")
+ self.confirm(root.childNodes[0].childNodes[1].data == "tx"
+ and root.childNodes[1].childNodes[0].childNodes[0].data == "t2x2"
+ and root.childNodes[1].childNodes[1].data == "t3x3"
+ , "testNormalize2 -- joined text fields")
+ self.confirm(root.childNodes[0].childNodes[1].nextSibling is None
+ and root.childNodes[0].childNodes[1].previousSibling
+ is root.childNodes[0].childNodes[0]
+ and root.childNodes[0].childNodes[0].previousSibling is None
+ and root.childNodes[0].childNodes[0].nextSibling
+ is root.childNodes[0].childNodes[1]
+ and root.childNodes[1].childNodes[1].nextSibling is None
+ and root.childNodes[1].childNodes[1].previousSibling
+ is root.childNodes[1].childNodes[0]
+ and root.childNodes[1].childNodes[0].previousSibling is None
+ and root.childNodes[1].childNodes[0].nextSibling
+ is root.childNodes[1].childNodes[1]
+ , "testNormalize2 -- sibling pointers")
+ doc.unlink()
+
+
+ def testBug0777884(self):
+ doc = parseString("text")
+ text = doc.documentElement.childNodes[0]
+ self.assertEqual(text.nodeType, Node.TEXT_NODE)
+ # Should run quietly, doing nothing.
+ text.normalize()
+ doc.unlink()
+
+ def testBug1433694(self):
+ doc = parseString("t")
+ node = doc.documentElement
+ node.childNodes[1].nodeValue = ""
+ node.normalize()
+ self.assertIsNone(node.childNodes[-1].nextSibling,
+ "Final child's .nextSibling should be None")
+
+ def testSiblings(self):
+ doc = parseString("text?")
+ root = doc.documentElement
+ (pi, text, elm) = root.childNodes
+
+ self.confirm(pi.nextSibling is text and
+ pi.previousSibling is None and
+ text.nextSibling is elm and
+ text.previousSibling is pi and
+ elm.nextSibling is None and
+ elm.previousSibling is text, "testSiblings")
+
+ doc.unlink()
+
+ def testParents(self):
+ doc = parseString(
+ "")
+ root = doc.documentElement
+ elm1 = root.childNodes[0]
+ (elm2a, elm2b) = elm1.childNodes
+ elm3 = elm2b.childNodes[0]
+
+ self.confirm(root.parentNode is doc and
+ elm1.parentNode is root and
+ elm2a.parentNode is elm1 and
+ elm2b.parentNode is elm1 and
+ elm3.parentNode is elm2b, "testParents")
+ doc.unlink()
+
+ def testNodeListItem(self):
+ doc = parseString("")
+ children = doc.childNodes
+ docelem = children[0]
+ self.confirm(children[0] is children.item(0)
+ and children.item(1) is None
+ and docelem.childNodes.item(0) is docelem.childNodes[0]
+ and docelem.childNodes.item(1) is docelem.childNodes[1]
+ and docelem.childNodes.item(0).childNodes.item(0) is None,
+ "test NodeList.item()")
+ doc.unlink()
+
+ def testEncodings(self):
+ doc = parseString('€')
+ self.assertEqual(doc.toxml(),
+ '\u20ac')
+ self.assertEqual(doc.toxml('utf-8'),
+ b'\xe2\x82\xac')
+ self.assertEqual(doc.toxml('iso-8859-15'),
+ b'\xa4')
+ self.assertEqual(doc.toxml('us-ascii'),
+ b'€')
+ self.assertEqual(doc.toxml('utf-16'),
+ ''
+ '\u20ac'.encode('utf-16'))
+
+ # Verify that character decoding errors raise exceptions instead
+ # of crashing
+ with self.assertRaises((UnicodeDecodeError, ExpatError)):
+ parseString(
+ b'Comment \xe7a va ? Tr\xe8s bien ?'
+ )
+
+ doc.unlink()
+
+ def testStandalone(self):
+ doc = parseString('€')
+ self.assertEqual(doc.toxml(),
+ '\u20ac')
+ self.assertEqual(doc.toxml(standalone=None),
+ '\u20ac')
+ self.assertEqual(doc.toxml(standalone=True),
+ '\u20ac')
+ self.assertEqual(doc.toxml(standalone=False),
+ '\u20ac')
+ self.assertEqual(doc.toxml('utf-8', True),
+ b''
+ b'\xe2\x82\xac')
+
+ doc.unlink()
+
+ class UserDataHandler:
+ called = 0
+ def handle(self, operation, key, data, src, dst):
+ dst.setUserData(key, data + 1, self)
+ src.setUserData(key, None, None)
+ self.called = 1
+
+ def testUserData(self):
+ dom = Document()
+ n = dom.createElement('e')
+ self.assertIsNone(n.getUserData("foo"))
+ n.setUserData("foo", None, None)
+ self.assertIsNone(n.getUserData("foo"))
+ n.setUserData("foo", 12, 12)
+ n.setUserData("bar", 13, 13)
+ self.assertEqual(n.getUserData("foo"), 12)
+ self.assertEqual(n.getUserData("bar"), 13)
+ n.setUserData("foo", None, None)
+ self.assertIsNone(n.getUserData("foo"))
+ self.assertEqual(n.getUserData("bar"), 13)
+
+ handler = self.UserDataHandler()
+ n.setUserData("bar", 12, handler)
+ c = n.cloneNode(1)
+ self.confirm(handler.called
+ and n.getUserData("bar") is None
+ and c.getUserData("bar") == 13)
+ n.unlink()
+ c.unlink()
+ dom.unlink()
+
+ def checkRenameNodeSharedConstraints(self, doc, node):
+ # Make sure illegal NS usage is detected:
+ self.assertRaises(xml.dom.NamespaceErr, doc.renameNode, node,
+ "http://xml.python.org/ns", "xmlns:foo")
+ doc2 = parseString("")
+ self.assertRaises(xml.dom.WrongDocumentErr, doc2.renameNode, node,
+ xml.dom.EMPTY_NAMESPACE, "foo")
+
+ def testRenameAttribute(self):
+ doc = parseString("")
+ elem = doc.documentElement
+ attrmap = elem.attributes
+ attr = elem.attributes['a']
+
+ # Simple renaming
+ attr = doc.renameNode(attr, xml.dom.EMPTY_NAMESPACE, "b")
+ self.confirm(attr.name == "b"
+ and attr.nodeName == "b"
+ and attr.localName is None
+ and attr.namespaceURI == xml.dom.EMPTY_NAMESPACE
+ and attr.prefix is None
+ and attr.value == "v"
+ and elem.getAttributeNode("a") is None
+ and elem.getAttributeNode("b").isSameNode(attr)
+ and attrmap["b"].isSameNode(attr)
+ and attr.ownerDocument.isSameNode(doc)
+ and attr.ownerElement.isSameNode(elem))
+
+ # Rename to have a namespace, no prefix
+ attr = doc.renameNode(attr, "http://xml.python.org/ns", "c")
+ self.confirm(attr.name == "c"
+ and attr.nodeName == "c"
+ and attr.localName == "c"
+ and attr.namespaceURI == "http://xml.python.org/ns"
+ and attr.prefix is None
+ and attr.value == "v"
+ and elem.getAttributeNode("a") is None
+ and elem.getAttributeNode("b") is None
+ and elem.getAttributeNode("c").isSameNode(attr)
+ and elem.getAttributeNodeNS(
+ "http://xml.python.org/ns", "c").isSameNode(attr)
+ and attrmap["c"].isSameNode(attr)
+ and attrmap[("http://xml.python.org/ns", "c")].isSameNode(attr))
+
+ # Rename to have a namespace, with prefix
+ attr = doc.renameNode(attr, "http://xml.python.org/ns2", "p:d")
+ self.confirm(attr.name == "p:d"
+ and attr.nodeName == "p:d"
+ and attr.localName == "d"
+ and attr.namespaceURI == "http://xml.python.org/ns2"
+ and attr.prefix == "p"
+ and attr.value == "v"
+ and elem.getAttributeNode("a") is None
+ and elem.getAttributeNode("b") is None
+ and elem.getAttributeNode("c") is None
+ and elem.getAttributeNodeNS(
+ "http://xml.python.org/ns", "c") is None
+ and elem.getAttributeNode("p:d").isSameNode(attr)
+ and elem.getAttributeNodeNS(
+ "http://xml.python.org/ns2", "d").isSameNode(attr)
+ and attrmap["p:d"].isSameNode(attr)
+ and attrmap[("http://xml.python.org/ns2", "d")].isSameNode(attr))
+
+ # Rename back to a simple non-NS node
+ attr = doc.renameNode(attr, xml.dom.EMPTY_NAMESPACE, "e")
+ self.confirm(attr.name == "e"
+ and attr.nodeName == "e"
+ and attr.localName is None
+ and attr.namespaceURI == xml.dom.EMPTY_NAMESPACE
+ and attr.prefix is None
+ and attr.value == "v"
+ and elem.getAttributeNode("a") is None
+ and elem.getAttributeNode("b") is None
+ and elem.getAttributeNode("c") is None
+ and elem.getAttributeNode("p:d") is None
+ and elem.getAttributeNodeNS(
+ "http://xml.python.org/ns", "c") is None
+ and elem.getAttributeNode("e").isSameNode(attr)
+ and attrmap["e"].isSameNode(attr))
+
+ self.assertRaises(xml.dom.NamespaceErr, doc.renameNode, attr,
+ "http://xml.python.org/ns", "xmlns")
+ self.checkRenameNodeSharedConstraints(doc, attr)
+ doc.unlink()
+
+ def testRenameElement(self):
+ doc = parseString("")
+ elem = doc.documentElement
+
+ # Simple renaming
+ elem = doc.renameNode(elem, xml.dom.EMPTY_NAMESPACE, "a")
+ self.confirm(elem.tagName == "a"
+ and elem.nodeName == "a"
+ and elem.localName is None
+ and elem.namespaceURI == xml.dom.EMPTY_NAMESPACE
+ and elem.prefix is None
+ and elem.ownerDocument.isSameNode(doc))
+
+ # Rename to have a namespace, no prefix
+ elem = doc.renameNode(elem, "http://xml.python.org/ns", "b")
+ self.confirm(elem.tagName == "b"
+ and elem.nodeName == "b"
+ and elem.localName == "b"
+ and elem.namespaceURI == "http://xml.python.org/ns"
+ and elem.prefix is None
+ and elem.ownerDocument.isSameNode(doc))
+
+ # Rename to have a namespace, with prefix
+ elem = doc.renameNode(elem, "http://xml.python.org/ns2", "p:c")
+ self.confirm(elem.tagName == "p:c"
+ and elem.nodeName == "p:c"
+ and elem.localName == "c"
+ and elem.namespaceURI == "http://xml.python.org/ns2"
+ and elem.prefix == "p"
+ and elem.ownerDocument.isSameNode(doc))
+
+ # Rename back to a simple non-NS node
+ elem = doc.renameNode(elem, xml.dom.EMPTY_NAMESPACE, "d")
+ self.confirm(elem.tagName == "d"
+ and elem.nodeName == "d"
+ and elem.localName is None
+ and elem.namespaceURI == xml.dom.EMPTY_NAMESPACE
+ and elem.prefix is None
+ and elem.ownerDocument.isSameNode(doc))
+
+ self.checkRenameNodeSharedConstraints(doc, elem)
+ doc.unlink()
+
+ def testRenameOther(self):
+ # We have to create a comment node explicitly since not all DOM
+ # builders used with minidom add comments to the DOM.
+ doc = xml.dom.minidom.getDOMImplementation().createDocument(
+ xml.dom.EMPTY_NAMESPACE, "e", None)
+ node = doc.createComment("comment")
+ self.assertRaises(xml.dom.NotSupportedErr, doc.renameNode, node,
+ xml.dom.EMPTY_NAMESPACE, "foo")
+ doc.unlink()
+
+ def testWholeText(self):
+ doc = parseString("a")
+ elem = doc.documentElement
+ text = elem.childNodes[0]
+ self.assertEqual(text.nodeType, Node.TEXT_NODE)
+
+ self.checkWholeText(text, "a")
+ elem.appendChild(doc.createTextNode("b"))
+ self.checkWholeText(text, "ab")
+ elem.insertBefore(doc.createCDATASection("c"), text)
+ self.checkWholeText(text, "cab")
+
+ # make sure we don't cross other nodes
+ splitter = doc.createComment("comment")
+ elem.appendChild(splitter)
+ text2 = doc.createTextNode("d")
+ elem.appendChild(text2)
+ self.checkWholeText(text, "cab")
+ self.checkWholeText(text2, "d")
+
+ x = doc.createElement("x")
+ elem.replaceChild(x, splitter)
+ splitter = x
+ self.checkWholeText(text, "cab")
+ self.checkWholeText(text2, "d")
+
+ x = doc.createProcessingInstruction("y", "z")
+ elem.replaceChild(x, splitter)
+ splitter = x
+ self.checkWholeText(text, "cab")
+ self.checkWholeText(text2, "d")
+
+ elem.removeChild(splitter)
+ self.checkWholeText(text, "cabd")
+ self.checkWholeText(text2, "cabd")
+
+ def testPatch1094164(self):
+ doc = parseString("")
+ elem = doc.documentElement
+ e = elem.firstChild
+ self.assertIs(e.parentNode, elem, "Before replaceChild()")
+ # Check that replacing a child with itself leaves the tree unchanged
+ elem.replaceChild(e, e)
+ self.assertIs(e.parentNode, elem, "After replaceChild()")
+
+ def testReplaceWholeText(self):
+ def setup():
+ doc = parseString("ad")
+ elem = doc.documentElement
+ text1 = elem.firstChild
+ text2 = elem.lastChild
+ splitter = text1.nextSibling
+ elem.insertBefore(doc.createTextNode("b"), splitter)
+ elem.insertBefore(doc.createCDATASection("c"), text1)
+ return doc, elem, text1, splitter, text2
+
+ doc, elem, text1, splitter, text2 = setup()
+ text = text1.replaceWholeText("new content")
+ self.checkWholeText(text, "new content")
+ self.checkWholeText(text2, "d")
+ self.assertEqual(len(elem.childNodes), 3)
+
+ doc, elem, text1, splitter, text2 = setup()
+ text = text2.replaceWholeText("new content")
+ self.checkWholeText(text, "new content")
+ self.checkWholeText(text1, "cab")
+ self.assertEqual(len(elem.childNodes), 5)
+
+ doc, elem, text1, splitter, text2 = setup()
+ text = text1.replaceWholeText("")
+ self.checkWholeText(text2, "d")
+ self.confirm(text is None
+ and len(elem.childNodes) == 2)
+
+ def testSchemaType(self):
+ doc = parseString(
+ "\n"
+ " \n"
+ " \n"
+ "]>")
+ elem = doc.documentElement
+ # We don't want to rely on any specific loader at this point, so
+ # just make sure we can get to all the names, and that the
+ # DTD-based namespace is right. The names can vary by loader
+ # since each supports a different level of DTD information.
+ t = elem.schemaType
+ self.confirm(t.name is None
+ and t.namespace == xml.dom.EMPTY_NAMESPACE)
+ names = "id notid text enum ref refs ent ents nm nms".split()
+ for name in names:
+ a = elem.getAttributeNode(name)
+ t = a.schemaType
+ self.confirm(hasattr(t, "name")
+ and t.namespace == xml.dom.EMPTY_NAMESPACE)
+
+ def testSetIdAttribute(self):
+ doc = parseString("")
+ e = doc.documentElement
+ a1 = e.getAttributeNode("a1")
+ a2 = e.getAttributeNode("a2")
+ self.confirm(doc.getElementById("v") is None
+ and not a1.isId
+ and not a2.isId)
+ e.setIdAttribute("a1")
+ self.confirm(e.isSameNode(doc.getElementById("v"))
+ and a1.isId
+ and not a2.isId)
+ e.setIdAttribute("a2")
+ self.confirm(e.isSameNode(doc.getElementById("v"))
+ and e.isSameNode(doc.getElementById("w"))
+ and a1.isId
+ and a2.isId)
+ # replace the a1 node; the new node should *not* be an ID
+ a3 = doc.createAttribute("a1")
+ a3.value = "v"
+ e.setAttributeNode(a3)
+ self.confirm(doc.getElementById("v") is None
+ and e.isSameNode(doc.getElementById("w"))
+ and not a1.isId
+ and a2.isId
+ and not a3.isId)
+ # renaming an attribute should not affect its ID-ness:
+ doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an")
+ self.confirm(e.isSameNode(doc.getElementById("w"))
+ and a2.isId)
+
+ def testSetIdAttributeNS(self):
+ NS1 = "http://xml.python.org/ns1"
+ NS2 = "http://xml.python.org/ns2"
+ doc = parseString("")
+ e = doc.documentElement
+ a1 = e.getAttributeNodeNS(NS1, "a1")
+ a2 = e.getAttributeNodeNS(NS2, "a2")
+ self.confirm(doc.getElementById("v") is None
+ and not a1.isId
+ and not a2.isId)
+ e.setIdAttributeNS(NS1, "a1")
+ self.confirm(e.isSameNode(doc.getElementById("v"))
+ and a1.isId
+ and not a2.isId)
+ e.setIdAttributeNS(NS2, "a2")
+ self.confirm(e.isSameNode(doc.getElementById("v"))
+ and e.isSameNode(doc.getElementById("w"))
+ and a1.isId
+ and a2.isId)
+ # replace the a1 node; the new node should *not* be an ID
+ a3 = doc.createAttributeNS(NS1, "a1")
+ a3.value = "v"
+ e.setAttributeNode(a3)
+ self.assertTrue(e.isSameNode(doc.getElementById("w")))
+ self.assertFalse(a1.isId)
+ self.assertTrue(a2.isId)
+ self.assertFalse(a3.isId)
+ self.assertIsNone(doc.getElementById("v"))
+ # renaming an attribute should not affect its ID-ness:
+ doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an")
+ self.confirm(e.isSameNode(doc.getElementById("w"))
+ and a2.isId)
+
+ def testSetIdAttributeNode(self):
+ NS1 = "http://xml.python.org/ns1"
+ NS2 = "http://xml.python.org/ns2"
+ doc = parseString("")
+ e = doc.documentElement
+ a1 = e.getAttributeNodeNS(NS1, "a1")
+ a2 = e.getAttributeNodeNS(NS2, "a2")
+ self.confirm(doc.getElementById("v") is None
+ and not a1.isId
+ and not a2.isId)
+ e.setIdAttributeNode(a1)
+ self.confirm(e.isSameNode(doc.getElementById("v"))
+ and a1.isId
+ and not a2.isId)
+ e.setIdAttributeNode(a2)
+ self.confirm(e.isSameNode(doc.getElementById("v"))
+ and e.isSameNode(doc.getElementById("w"))
+ and a1.isId
+ and a2.isId)
+ # replace the a1 node; the new node should *not* be an ID
+ a3 = doc.createAttributeNS(NS1, "a1")
+ a3.value = "v"
+ e.setAttributeNode(a3)
+ self.assertTrue(e.isSameNode(doc.getElementById("w")))
+ self.assertFalse(a1.isId)
+ self.assertTrue(a2.isId)
+ self.assertFalse(a3.isId)
+ self.assertIsNone(doc.getElementById("v"))
+ # renaming an attribute should not affect its ID-ness:
+ doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an")
+ self.confirm(e.isSameNode(doc.getElementById("w"))
+ and a2.isId)
+
+ def assert_recursive_equal(self, doc, doc2):
+ stack = [(doc, doc2)]
+ while stack:
+ n1, n2 = stack.pop()
+ self.assertEqual(n1.nodeType, n2.nodeType)
+ self.assertEqual(len(n1.childNodes), len(n2.childNodes))
+ self.assertEqual(n1.nodeName, n2.nodeName)
+ self.assertFalse(n1.isSameNode(n2))
+ self.assertFalse(n2.isSameNode(n1))
+ if n1.nodeType == Node.DOCUMENT_TYPE_NODE:
+ len(n1.entities)
+ len(n2.entities)
+ len(n1.notations)
+ len(n2.notations)
+ self.assertEqual(len(n1.entities), len(n2.entities))
+ self.assertEqual(len(n1.notations), len(n2.notations))
+ for i in range(len(n1.notations)):
+ # XXX this loop body doesn't seem to be executed?
+ no1 = n1.notations.item(i)
+ no2 = n1.notations.item(i)
+ self.assertEqual(no1.name, no2.name)
+ self.assertEqual(no1.publicId, no2.publicId)
+ self.assertEqual(no1.systemId, no2.systemId)
+ stack.append((no1, no2))
+ for i in range(len(n1.entities)):
+ e1 = n1.entities.item(i)
+ e2 = n2.entities.item(i)
+ self.assertEqual(e1.notationName, e2.notationName)
+ self.assertEqual(e1.publicId, e2.publicId)
+ self.assertEqual(e1.systemId, e2.systemId)
+ stack.append((e1, e2))
+ if n1.nodeType != Node.DOCUMENT_NODE:
+ self.assertTrue(n1.ownerDocument.isSameNode(doc))
+ self.assertTrue(n2.ownerDocument.isSameNode(doc2))
+ for i in range(len(n1.childNodes)):
+ stack.append((n1.childNodes[i], n2.childNodes[i]))
+
+ def testPickledDocument(self):
+ doc = parseString(sample)
+ for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+ s = pickle.dumps(doc, proto)
+ doc2 = pickle.loads(s)
+ self.assert_recursive_equal(doc, doc2)
+
+ def testDeepcopiedDocument(self):
+ doc = parseString(sample)
+ doc2 = copy.deepcopy(doc)
+ self.assert_recursive_equal(doc, doc2)
+
+ def testSerializeCommentNodeWithDoubleHyphen(self):
+ doc = create_doc_without_doctype()
+ doc.appendChild(doc.createComment("foo--bar"))
+ self.assertRaises(ValueError, doc.toxml)
+
+
+ def testEmptyXMLNSValue(self):
+ doc = parseString("\n"
+ "\n")
+ doc2 = parseString(doc.toxml())
+ self.assertEqual(doc2.namespaceURI, xml.dom.EMPTY_NAMESPACE)
+
+ def testExceptionOnSpacesInXMLNSValue(self):
+ with self.assertRaises((ValueError, ExpatError)):
+ parseString(
+ '' +
+ ''
+ )
+
+ def testDocRemoveChild(self):
+ doc = parse(tstfile)
+ title_tag = doc.documentElement.getElementsByTagName("TITLE")[0]
+ self.assertRaises( xml.dom.NotFoundErr, doc.removeChild, title_tag)
+ num_children_before = len(doc.childNodes)
+ doc.removeChild(doc.childNodes[0])
+ num_children_after = len(doc.childNodes)
+ self.assertEqual(num_children_after, num_children_before - 1)
+
+ def testProcessingInstructionNameError(self):
+ # wrong variable in .nodeValue property will
+ # lead to "NameError: name 'data' is not defined"
+ doc = parse(tstfile)
+ pi = doc.createProcessingInstruction("y", "z")
+ pi.nodeValue = "crash"
+
+ def test_minidom_attribute_order(self):
+ xml_str = ''
+ doc = parseString(xml_str)
+ output = io.StringIO()
+ doc.writexml(output)
+ self.assertEqual(output.getvalue(), xml_str)
+
+ def test_toxml_with_attributes_ordered(self):
+ xml_str = ''
+ doc = parseString(xml_str)
+ self.assertEqual(doc.toxml(), xml_str)
+
+ def test_toprettyxml_with_attributes_ordered(self):
+ xml_str = ''
+ doc = parseString(xml_str)
+ self.assertEqual(doc.toprettyxml(),
+ '\n'
+ '\n')
+
+ def test_toprettyxml_with_cdata(self):
+ xml_str = ']]>'
+ doc = parseString(xml_str)
+ self.assertEqual(doc.toprettyxml(),
+ '\n'
+ '\n'
+ '\t]]>\n'
+ '\n')
+
+ def test_cdata_parsing(self):
+ xml_str = ']]>'
+ dom1 = parseString(xml_str)
+ self.checkWholeText(dom1.getElementsByTagName('node')[0].firstChild, '')
+ dom2 = parseString(dom1.toprettyxml())
+ self.checkWholeText(dom2.getElementsByTagName('node')[0].firstChild, '')
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py
index 1308c73be..f91fa1f8a 100644
--- a/Lib/test/test_pulldom.py
+++ b/Lib/test/test_pulldom.py
@@ -23,8 +23,8 @@ SMALL_SAMPLE = """
class PullDOMTestCase(unittest.TestCase):
- # TODO: RUSTPYTHON FileNotFoundError: [Errno 2] No such file or directory (os error 2): 'xmltestdata/test.xml' -> 'None'
- @unittest.expectedFailure
+
+ @unittest.expectedFailure # TODO: RUSTPYTHON; FileNotFoundError: [Errno 2] No such file or directory (os error 2): 'xmltestdata/test.xml' -> 'None'
def test_parse(self):
"""Minimal test of DOMEventStream.parse()"""
@@ -41,15 +41,14 @@ class PullDOMTestCase(unittest.TestCase):
with open(tstfile, "rb") as fin:
list(pulldom.parse(fin))
- # TODO: RUSTPYTHON implement DOM semantic
- @unittest.expectedFailure
+ @unittest.expectedFailure # TODO: RUSTPYTHON; implement DOM semantic
def test_parse_semantics(self):
"""Test DOMEventStream parsing semantics."""
items = pulldom.parseString(SMALL_SAMPLE)
evt, node = next(items)
# Just check the node is a Document:
- self.assertTrue(hasattr(node, "createElement"))
+ self.assertHasAttr(node, "createElement")
self.assertEqual(pulldom.START_DOCUMENT, evt)
evt, node = next(items)
self.assertEqual(pulldom.START_ELEMENT, evt)
@@ -105,8 +104,7 @@ class PullDOMTestCase(unittest.TestCase):
#evt, node = next(items)
#self.assertEqual(pulldom.END_DOCUMENT, evt)
- # TODO: RUSTPYTHON pulldom.parseString(SMALL_SAMPLE) return iterator with tuple with 2 elements
- @unittest.expectedFailure
+ @unittest.expectedFailure # TODO: RUSTPYTHON; pulldom.parseString(SMALL_SAMPLE) return iterator with tuple with 2 elements
def test_expandItem(self):
"""Ensure expandItem works as expected."""
items = pulldom.parseString(SMALL_SAMPLE)
@@ -197,7 +195,7 @@ class ThoroughTestCase(unittest.TestCase):
evt, node = next(pd)
self.assertEqual(pulldom.START_DOCUMENT, evt)
# Just check the node is a Document:
- self.assertTrue(hasattr(node, "createElement"))
+ self.assertHasAttr(node, "createElement")
if before_root:
evt, node = next(pd)
@@ -303,8 +301,7 @@ class SAX2DOMTestCase(unittest.TestCase):
def confirm(self, test, testname="Test"):
self.assertTrue(test, testname)
- # TODO: RUSTPYTHON read from stream io
- @unittest.expectedFailure
+ @unittest.expectedFailure # TODO: RUSTPYTHON; read from stream io
def test_basic(self):
"""Ensure SAX2DOM can parse from a stream."""
with io.StringIO(SMALL_SAMPLE) as fin:
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index d360e8cd6..ec2be4d5c 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -1,14 +1,18 @@
# XXX TypeErrors on calling handlers, or on bad return values from a
# handler, are obscure and unhelpful.
+import abc
+import functools
import os
+import re
import sys
import sysconfig
+import textwrap
import unittest
import traceback
from io import BytesIO
from test import support
-from test.support import os_helper
+from test.support import import_helper, os_helper
from xml.parsers import expat
from xml.parsers.expat import errors
@@ -261,7 +265,7 @@ class ParseTest(unittest.TestCase):
operations = out.out
self._verify_parse_output(operations)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_parse_again(self):
parser = expat.ParserCreate()
file = BytesIO(data)
@@ -282,7 +286,7 @@ class NamespaceSeparatorTest(unittest.TestCase):
expat.ParserCreate(namespace_separator=None)
expat.ParserCreate(namespace_separator=' ')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_illegal(self):
with self.assertRaisesRegex(TypeError,
r"ParserCreate\(\) argument (2|'namespace_separator') "
@@ -309,7 +313,7 @@ class NamespaceSeparatorTest(unittest.TestCase):
class InterningTest(unittest.TestCase):
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test(self):
# Test the interning machinery.
p = expat.ParserCreate()
@@ -325,7 +329,7 @@ class InterningTest(unittest.TestCase):
# L should have the same string repeated over and over.
self.assertTrue(tag is entry)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_issue9402(self):
# create an ExternalEntityParserCreate with buffer text
class ExternalOutputter:
@@ -383,7 +387,7 @@ class BufferTextTest(unittest.TestCase):
parser = expat.ParserCreate()
self.assertFalse(parser.buffer_text)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_buffering_enabled(self):
# Make sure buffering is turned on
self.assertTrue(self.parser.buffer_text)
@@ -391,7 +395,7 @@ class BufferTextTest(unittest.TestCase):
self.assertEqual(self.stuff, ['123'],
"buffered text not properly collapsed")
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test1(self):
# XXX This test exposes more detail of Expat's text chunking than we
# XXX like, but it tests what we need to concisely.
@@ -401,7 +405,7 @@ class BufferTextTest(unittest.TestCase):
["", "1", "", "2", "\n", "3", "", "4\n5"],
"buffering control not reacting as expected")
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test2(self):
self.parser.Parse(b"1<2> \n 3", True)
self.assertEqual(self.stuff, ["1<2> \n 3"],
@@ -434,7 +438,7 @@ class BufferTextTest(unittest.TestCase):
["", "1", "", "", "2", "", "", "345", ""],
"buffered text not properly split")
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test7(self):
self.setHandlers(["CommentHandler", "EndElementHandler",
"StartElementHandler"])
@@ -536,7 +540,7 @@ class PositionTest(unittest.TestCase):
class sf1296433Test(unittest.TestCase):
- @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Expected type 'str' but 'bytes' found.
+ @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Expected type 'str' but 'bytes' found.
def test_parse_only_xml_data(self):
# https://bugs.python.org/issue1296433
#
@@ -560,15 +564,15 @@ class ChardataBufferTest(unittest.TestCase):
test setting of chardata buffer size
"""
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_1025_bytes(self):
self.assertEqual(self.small_buffer_test(1025), 2)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_1000_bytes(self):
self.assertEqual(self.small_buffer_test(1000), 1)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_wrong_size(self):
parser = expat.ParserCreate()
parser.buffer_text = 1
@@ -581,7 +585,7 @@ class ChardataBufferTest(unittest.TestCase):
with self.assertRaises(TypeError):
parser.buffer_size = 512.0
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_unchanged_size(self):
xml1 = b"" + b'a' * 512
xml2 = b'a'*512 + b''
@@ -605,7 +609,7 @@ class ChardataBufferTest(unittest.TestCase):
self.assertEqual(self.n, 2)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_disabling_buffer(self):
xml1 = b"" + b'a' * 512
xml2 = b'b' * 1024
@@ -650,7 +654,7 @@ class ChardataBufferTest(unittest.TestCase):
parser.Parse(xml)
return self.n
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_change_size_1(self):
xml1 = b"" + b'a' * 1024
xml2 = b'aaa' + b'a' * 1025 + b''
@@ -667,7 +671,7 @@ class ChardataBufferTest(unittest.TestCase):
parser.Parse(xml2, True)
self.assertEqual(self.n, 2)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_change_size_2(self):
xml1 = b"a" + b'a' * 1023
xml2 = b'aaa' + b'a' * 1025 + b''
@@ -684,8 +688,25 @@ class ChardataBufferTest(unittest.TestCase):
parser.Parse(xml2, True)
self.assertEqual(self.n, 4)
+class ElementDeclHandlerTest(unittest.TestCase):
+ def test_trigger_leak(self):
+ # Unfixed, this test would leak the memory of the so-called
+ # "content model" in function ``my_ElementDeclHandler`` of pyexpat.
+ # See https://github.com/python/cpython/issues/140593.
+ data = textwrap.dedent('''\
+
+ ]>
+
+ ''').encode('UTF-8')
+
+ parser = expat.ParserCreate()
+ parser.NotStandaloneHandler = lambda: 1.234 # arbitrary float
+ parser.ElementDeclHandler = lambda _1, _2: None
+ self.assertRaises(TypeError, parser.Parse, data, True)
+
class MalformedInputTest(unittest.TestCase):
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test1(self):
xml = b"\0\r\n"
parser = expat.ParserCreate()
@@ -695,7 +716,7 @@ class MalformedInputTest(unittest.TestCase):
except expat.ExpatError as e:
self.assertEqual(str(e), 'unclosed token: line 2, column 0')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test2(self):
# \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
xml = b"\r\n"
@@ -705,13 +726,13 @@ class MalformedInputTest(unittest.TestCase):
parser.Parse(xml, True)
class ErrorMessageTest(unittest.TestCase):
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_codes(self):
# verify mapping of errors.codes and errors.messages
self.assertEqual(errors.XML_ERROR_SYNTAX,
errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_expaterror(self):
xml = b'<'
parser = expat.ParserCreate()
@@ -727,7 +748,7 @@ class ForeignDTDTests(unittest.TestCase):
"""
Tests for the UseForeignDTD method of expat parser objects.
"""
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_use_foreign_dtd(self):
"""
If UseForeignDTD is passed True and a document without an external
@@ -756,7 +777,7 @@ class ForeignDTDTests(unittest.TestCase):
parser.Parse(b"")
self.assertEqual(handler_call_args, [(None, None)])
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_ignore_use_foreign_dtd(self):
"""
If UseForeignDTD is passed True and a document with an external
@@ -785,7 +806,7 @@ class ParentParserLifetimeTest(unittest.TestCase):
See https://github.com/python/cpython/issues/139400.
"""
- @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate'
+ @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate'
def test_parent_parser_outlives_its_subparsers__single(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)
@@ -794,7 +815,7 @@ class ParentParserLifetimeTest(unittest.TestCase):
# while it's still being referenced by a related subparser.
del parser
- @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate'
+ @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate'
def test_parent_parser_outlives_its_subparsers__multiple(self):
parser = expat.ParserCreate()
subparser_one = parser.ExternalEntityParserCreate(None)
@@ -804,7 +825,7 @@ class ParentParserLifetimeTest(unittest.TestCase):
# while it's still being referenced by a related subparser.
del parser
- @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate'
+ @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'ExternalEntityParserCreate'
def test_parent_parser_outlives_its_subparsers__chain(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)
@@ -817,7 +838,7 @@ class ParentParserLifetimeTest(unittest.TestCase):
class ReparseDeferralTest(unittest.TestCase):
- @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'GetReparseDeferralEnabled'
+ @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'GetReparseDeferralEnabled'
def test_getter_setter_round_trip(self):
parser = expat.ParserCreate()
enabled = (expat.version_info >= (2, 6, 0))
@@ -828,7 +849,7 @@ class ReparseDeferralTest(unittest.TestCase):
parser.SetReparseDeferralEnabled(True)
self.assertIs(parser.GetReparseDeferralEnabled(), enabled)
- @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'GetReparseDeferralEnabled'
+ @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'GetReparseDeferralEnabled'
def test_reparse_deferral_enabled(self):
if expat.version_info < (2, 6, 0):
self.skipTest(f'Expat {expat.version_info} does not '
@@ -853,7 +874,7 @@ class ReparseDeferralTest(unittest.TestCase):
self.assertEqual(started, ['doc'])
- @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'SetReparseDeferralEnabled'
+ @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'xmlparser' object has no attribute 'SetReparseDeferralEnabled'
def test_reparse_deferral_disabled(self):
started = []
@@ -873,5 +894,199 @@ class ReparseDeferralTest(unittest.TestCase):
self.assertEqual(started, ['doc'])
+class AttackProtectionTestBase(abc.ABC):
+ """
+ Base class for testing protections against XML payloads with
+ disproportionate amplification.
+
+ The protections being tested should detect and prevent attacks
+ that leverage disproportionate amplification from small inputs.
+ """
+
+ @staticmethod
+ def exponential_expansion_payload(*, nrows, ncols, text='.'):
+ """Create a billion laughs attack payload.
+
+ Be careful: the number of total items is pow(n, k), thereby
+ requiring at least pow(ncols, nrows) * sizeof(text) memory!
+ """
+ template = textwrap.dedent(f"""\
+
+
+
+ {{body}}
+ ]>
+ &row{nrows};
+ """).rstrip()
+
+ body = '\n'.join(
+ f''
+ for i in range(nrows)
+ )
+ body = textwrap.indent(body, ' ' * 4)
+ return template.format(body=body)
+
+ def test_payload_generation(self):
+ # self-test for exponential_expansion_payload()
+ payload = self.exponential_expansion_payload(nrows=2, ncols=3)
+ self.assertEqual(payload, textwrap.dedent("""\
+
+
+
+
+
+ ]>
+ &row2;
+ """).rstrip())
+
+ def assert_root_parser_failure(self, func, /, *args, **kwargs):
+ """Check that func(*args, **kwargs) is invalid for a sub-parser."""
+ msg = "parser must be a root parser"
+ self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs)
+
+ @abc.abstractmethod
+ def assert_rejected(self, func, /, *args, **kwargs):
+ """Assert that func(*args, **kwargs) triggers the attack protection.
+
+ Note: this method must ensure that the attack protection being tested
+ is the one that is actually triggered at runtime, e.g., by matching
+ the exact error message.
+ """
+
+ @abc.abstractmethod
+ def set_activation_threshold(self, parser, threshold):
+ """Set the activation threshold for the tested protection."""
+
+ @abc.abstractmethod
+ def set_maximum_amplification(self, parser, max_factor):
+ """Set the maximum amplification factor for the tested protection."""
+
+ @abc.abstractmethod
+ def test_set_activation_threshold__threshold_reached(self):
+ """Test when the activation threshold is exceeded."""
+
+ @abc.abstractmethod
+ def test_set_activation_threshold__threshold_not_reached(self):
+ """Test when the activation threshold is not exceeded."""
+
+ def test_set_activation_threshold__invalid_threshold_type(self):
+ parser = expat.ParserCreate()
+ setter = functools.partial(self.set_activation_threshold, parser)
+
+ self.assertRaises(TypeError, setter, 1.0)
+ self.assertRaises(TypeError, setter, -1.5)
+ self.assertRaises(ValueError, setter, -5)
+
+ def test_set_activation_threshold__invalid_threshold_range(self):
+ _testcapi = import_helper.import_module("_testcapi")
+ parser = expat.ParserCreate()
+ setter = functools.partial(self.set_activation_threshold, parser)
+
+ self.assertRaises(OverflowError, setter, _testcapi.ULLONG_MAX + 1)
+
+ def test_set_activation_threshold__fail_for_subparser(self):
+ parser = expat.ParserCreate()
+ subparser = parser.ExternalEntityParserCreate(None)
+ setter = functools.partial(self.set_activation_threshold, subparser)
+ self.assert_root_parser_failure(setter, 12345)
+
+ @abc.abstractmethod
+ def test_set_maximum_amplification__amplification_exceeded(self):
+ """Test when the amplification factor is exceeded."""
+
+ @abc.abstractmethod
+ def test_set_maximum_amplification__amplification_not_exceeded(self):
+ """Test when the amplification factor is not exceeded."""
+
+ def test_set_maximum_amplification__infinity(self):
+ inf = float('inf') # an 'inf' threshold is allowed by Expat
+ parser = expat.ParserCreate()
+ self.assertIsNone(self.set_maximum_amplification(parser, inf))
+
+ def test_set_maximum_amplification__invalid_max_factor_type(self):
+ parser = expat.ParserCreate()
+ setter = functools.partial(self.set_maximum_amplification, parser)
+
+ self.assertRaises(TypeError, setter, None)
+ self.assertRaises(TypeError, setter, 'abc')
+
+ def test_set_maximum_amplification__invalid_max_factor_range(self):
+ parser = expat.ParserCreate()
+ setter = functools.partial(self.set_maximum_amplification, parser)
+
+ msg = re.escape("'max_factor' must be at least 1.0")
+ self.assertRaisesRegex(expat.ExpatError, msg, setter, float('nan'))
+ self.assertRaisesRegex(expat.ExpatError, msg, setter, 0.99)
+
+ def test_set_maximum_amplification__fail_for_subparser(self):
+ parser = expat.ParserCreate()
+ subparser = parser.ExternalEntityParserCreate(None)
+ setter = functools.partial(self.set_maximum_amplification, subparser)
+ self.assert_root_parser_failure(setter, 123.45)
+
+
+@unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2")
+class MemoryProtectionTest(AttackProtectionTestBase, unittest.TestCase):
+
+ # NOTE: with the default Expat configuration, the billion laughs protection
+ # may hit before the allocation limiter if exponential_expansion_payload()
+ # is not carefully parametrized. As such, the payloads should be chosen so
+ # that either the allocation limiter is hit before other protections are
+ # triggered or no protection at all is triggered.
+
+ def assert_rejected(self, func, /, *args, **kwargs):
+ """Check that func(*args, **kwargs) hits the allocation limit."""
+ msg = r"out of memory: line \d+, column \d+"
+ self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs)
+
+ def set_activation_threshold(self, parser, threshold):
+ return parser.SetAllocTrackerActivationThreshold(threshold)
+
+ def set_maximum_amplification(self, parser, max_factor):
+ return parser.SetAllocTrackerMaximumAmplification(max_factor)
+
+ def test_set_activation_threshold__threshold_reached(self):
+ parser = expat.ParserCreate()
+ # Choose a threshold expected to be always reached.
+ self.set_activation_threshold(parser, 3)
+ # Check that the threshold is reached by choosing a small factor
+ # and a payload whose peak amplification factor exceeds it.
+ self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
+ payload = self.exponential_expansion_payload(ncols=10, nrows=4)
+ self.assert_rejected(parser.Parse, payload, True)
+
+ def test_set_activation_threshold__threshold_not_reached(self):
+ parser = expat.ParserCreate()
+ # Choose a threshold expected to be never reached.
+ self.set_activation_threshold(parser, pow(10, 5))
+ # Check that the threshold is reached by choosing a small factor
+ # and a payload whose peak amplification factor exceeds it.
+ self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
+ payload = self.exponential_expansion_payload(ncols=10, nrows=4)
+ self.assertIsNotNone(parser.Parse(payload, True))
+
+ def test_set_maximum_amplification__amplification_exceeded(self):
+ parser = expat.ParserCreate()
+ # Unconditionally enable maximum activation factor.
+ self.set_activation_threshold(parser, 0)
+ # Choose a max amplification factor expected to always be exceeded.
+ self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
+ # Craft a payload for which the peak amplification factor is > 1.0.
+ payload = self.exponential_expansion_payload(ncols=1, nrows=2)
+ self.assert_rejected(parser.Parse, payload, True)
+
+ def test_set_maximum_amplification__amplification_not_exceeded(self):
+ parser = expat.ParserCreate()
+ # Unconditionally enable maximum activation factor.
+ self.set_activation_threshold(parser, 0)
+ # Choose a max amplification factor expected to never be exceeded.
+ self.assertIsNone(self.set_maximum_amplification(parser, 1e4))
+ # Craft a payload for which the peak amplification factor is < 1e4.
+ payload = self.exponential_expansion_payload(ncols=1, nrows=2)
+ self.assertIsNotNone(parser.Parse(payload, True))
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
new file mode 100644
index 000000000..5c10bcedc
--- /dev/null
+++ b/Lib/test/test_sax.py
@@ -0,0 +1,1577 @@
+# regression test for SAX 2.0
+
+from xml.sax import make_parser, ContentHandler, \
+ SAXException, SAXReaderNotAvailable, SAXParseException
+import unittest
+from unittest import mock
+try:
+ make_parser()
+except SAXReaderNotAvailable:
+ # don't try to test this module if we cannot create a parser
+ raise unittest.SkipTest("no XML parsers available")
+from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
+ XMLFilterBase, prepare_input_source
+from xml.sax.expatreader import create_parser
+from xml.sax.handler import (feature_namespaces, feature_external_ges,
+ LexicalHandler)
+from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
+from xml import sax
+from io import BytesIO, StringIO
+import codecs
+import os.path
+import pyexpat
+import shutil
+import sys
+from urllib.error import URLError
+import urllib.request
+from test.support import os_helper
+from test.support import findfile, check__all__
+from test.support.os_helper import FakePath, TESTFN
+
+
+TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
+TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
+try:
+ TEST_XMLFILE.encode("utf-8")
+ TEST_XMLFILE_OUT.encode("utf-8")
+except UnicodeEncodeError:
+ raise unittest.SkipTest("filename is not encodable to utf8")
+
+supports_nonascii_filenames = True
+if not os.path.supports_unicode_filenames:
+ try:
+ os_helper.TESTFN_UNICODE.encode(sys.getfilesystemencoding())
+ except (UnicodeError, TypeError):
+ # Either the file system encoding is None, or the file name
+ # cannot be encoded in the file system encoding.
+ supports_nonascii_filenames = False
+requires_nonascii_filenames = unittest.skipUnless(
+ supports_nonascii_filenames,
+ 'Requires non-ascii filenames support')
+
+ns_uri = "http://www.python.org/xml-ns/saxtest/"
+
+class XmlTestBase(unittest.TestCase):
+ def verify_empty_attrs(self, attrs):
+ self.assertRaises(KeyError, attrs.getValue, "attr")
+ self.assertRaises(KeyError, attrs.getValueByQName, "attr")
+ self.assertRaises(KeyError, attrs.getNameByQName, "attr")
+ self.assertRaises(KeyError, attrs.getQNameByName, "attr")
+ self.assertRaises(KeyError, attrs.__getitem__, "attr")
+ self.assertEqual(attrs.getLength(), 0)
+ self.assertEqual(attrs.getNames(), [])
+ self.assertEqual(attrs.getQNames(), [])
+ self.assertEqual(len(attrs), 0)
+ self.assertNotIn("attr", attrs)
+ self.assertEqual(list(attrs.keys()), [])
+ self.assertEqual(attrs.get("attrs"), None)
+ self.assertEqual(attrs.get("attrs", 25), 25)
+ self.assertEqual(list(attrs.items()), [])
+ self.assertEqual(list(attrs.values()), [])
+
+ def verify_empty_nsattrs(self, attrs):
+ self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr"))
+ self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr")
+ self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr")
+ self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr"))
+ self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr"))
+ self.assertEqual(attrs.getLength(), 0)
+ self.assertEqual(attrs.getNames(), [])
+ self.assertEqual(attrs.getQNames(), [])
+ self.assertEqual(len(attrs), 0)
+ self.assertNotIn((ns_uri, "attr"), attrs)
+ self.assertEqual(list(attrs.keys()), [])
+ self.assertEqual(attrs.get((ns_uri, "attr")), None)
+ self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25)
+ self.assertEqual(list(attrs.items()), [])
+ self.assertEqual(list(attrs.values()), [])
+
+ def verify_attrs_wattr(self, attrs):
+ self.assertEqual(attrs.getLength(), 1)
+ self.assertEqual(attrs.getNames(), ["attr"])
+ self.assertEqual(attrs.getQNames(), ["attr"])
+ self.assertEqual(len(attrs), 1)
+ self.assertIn("attr", attrs)
+ self.assertEqual(list(attrs.keys()), ["attr"])
+ self.assertEqual(attrs.get("attr"), "val")
+ self.assertEqual(attrs.get("attr", 25), "val")
+ self.assertEqual(list(attrs.items()), [("attr", "val")])
+ self.assertEqual(list(attrs.values()), ["val"])
+ self.assertEqual(attrs.getValue("attr"), "val")
+ self.assertEqual(attrs.getValueByQName("attr"), "val")
+ self.assertEqual(attrs.getNameByQName("attr"), "attr")
+ self.assertEqual(attrs["attr"], "val")
+ self.assertEqual(attrs.getQNameByName("attr"), "attr")
+
+
+def xml_str(doc, encoding=None):
+ if encoding is None:
+ return doc
+ return '\n%s' % (encoding, doc)
+
+def xml_bytes(doc, encoding, decl_encoding=...):
+ if decl_encoding is ...:
+ decl_encoding = encoding
+ return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
+
+def make_xml_file(doc, encoding, decl_encoding=...):
+ if decl_encoding is ...:
+ decl_encoding = encoding
+ with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
+ f.write(xml_str(doc, decl_encoding))
+
+
+class ParseTest(unittest.TestCase):
+ data = '$\xa3\u20ac\U0001017b'
+
+ def tearDown(self):
+ os_helper.unlink(TESTFN)
+
+ def check_parse(self, f):
+ from xml.sax import parse
+ result = StringIO()
+ parse(f, XMLGenerator(result, 'utf-8'))
+ self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
+
+ def test_parse_text(self):
+ encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
+ 'utf-16', 'utf-16le', 'utf-16be')
+ for encoding in encodings:
+ self.check_parse(StringIO(xml_str(self.data, encoding)))
+ make_xml_file(self.data, encoding)
+ with open(TESTFN, 'r', encoding=encoding) as f:
+ self.check_parse(f)
+ self.check_parse(StringIO(self.data))
+ make_xml_file(self.data, encoding, None)
+ with open(TESTFN, 'r', encoding=encoding) as f:
+ self.check_parse(f)
+
+ def test_parse_bytes(self):
+ # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
+ # UTF-16 is autodetected
+ encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
+ for encoding in encodings:
+ self.check_parse(BytesIO(xml_bytes(self.data, encoding)))
+ make_xml_file(self.data, encoding)
+ self.check_parse(TESTFN)
+ with open(TESTFN, 'rb') as f:
+ self.check_parse(f)
+ self.check_parse(BytesIO(xml_bytes(self.data, encoding, None)))
+ make_xml_file(self.data, encoding, None)
+ self.check_parse(TESTFN)
+ with open(TESTFN, 'rb') as f:
+ self.check_parse(f)
+ # accept UTF-8 with BOM
+ self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
+ make_xml_file(self.data, 'utf-8-sig', 'utf-8')
+ self.check_parse(TESTFN)
+ with open(TESTFN, 'rb') as f:
+ self.check_parse(f)
+ self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
+ make_xml_file(self.data, 'utf-8-sig', None)
+ self.check_parse(TESTFN)
+ with open(TESTFN, 'rb') as f:
+ self.check_parse(f)
+ # accept data with declared encoding
+ self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1')))
+ make_xml_file(self.data, 'iso-8859-1')
+ self.check_parse(TESTFN)
+ with open(TESTFN, 'rb') as f:
+ self.check_parse(f)
+ # fail on non-UTF-8 incompatible data without declared encoding
+ with self.assertRaises(SAXException):
+ self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
+ make_xml_file(self.data, 'iso-8859-1', None)
+ with self.assertRaises(SAXException):
+ self.check_parse(TESTFN)
+ with open(TESTFN, 'rb') as f:
+ with self.assertRaises(SAXException):
+ self.check_parse(f)
+
+ def test_parse_path_object(self):
+ make_xml_file(self.data, 'utf-8', None)
+ self.check_parse(FakePath(TESTFN))
+
+ def test_parse_InputSource(self):
+ # accept data without declared but with explicitly specified encoding
+ make_xml_file(self.data, 'iso-8859-1', None)
+ with open(TESTFN, 'rb') as f:
+ input = InputSource()
+ input.setByteStream(f)
+ input.setEncoding('iso-8859-1')
+ self.check_parse(input)
+
+ def test_parse_close_source(self):
+ builtin_open = open
+ fileobj = None
+
+ def mock_open(*args):
+ nonlocal fileobj
+ fileobj = builtin_open(*args)
+ return fileobj
+
+ with mock.patch('xml.sax.saxutils.open', side_effect=mock_open):
+ make_xml_file(self.data, 'iso-8859-1', None)
+ with self.assertRaises(SAXException):
+ self.check_parse(TESTFN)
+ self.assertTrue(fileobj.closed)
+
+ def check_parseString(self, s):
+ from xml.sax import parseString
+ result = StringIO()
+ parseString(s, XMLGenerator(result, 'utf-8'))
+ self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
+
+ def test_parseString_text(self):
+ encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
+ 'utf-16', 'utf-16le', 'utf-16be')
+ for encoding in encodings:
+ self.check_parseString(xml_str(self.data, encoding))
+ self.check_parseString(self.data)
+
+ def test_parseString_bytes(self):
+ # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
+ # UTF-16 is autodetected
+ encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
+ for encoding in encodings:
+ self.check_parseString(xml_bytes(self.data, encoding))
+ self.check_parseString(xml_bytes(self.data, encoding, None))
+ # accept UTF-8 with BOM
+ self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
+ self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
+ # accept data with declared encoding
+ self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
+ # fail on non-UTF-8 incompatible data without declared encoding
+ with self.assertRaises(SAXException):
+ self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
+
+class MakeParserTest(unittest.TestCase):
+ def test_make_parser2(self):
+ # Creating parsers several times in a row should succeed.
+ # Testing this because there have been failures of this kind
+ # before.
+ from xml.sax import make_parser
+ p = make_parser()
+ from xml.sax import make_parser
+ p = make_parser()
+ from xml.sax import make_parser
+ p = make_parser()
+ from xml.sax import make_parser
+ p = make_parser()
+ from xml.sax import make_parser
+ p = make_parser()
+ from xml.sax import make_parser
+ p = make_parser()
+
+ def test_make_parser3(self):
+ # Testing that make_parser can handle different types of
+ # iterables.
+ make_parser(['module'])
+ make_parser(('module', ))
+ make_parser({'module'})
+ make_parser(frozenset({'module'}))
+ make_parser({'module': None})
+ make_parser(iter(['module']))
+
+ def test_make_parser4(self):
+ # Testing that make_parser can handle empty iterables.
+ make_parser([])
+ make_parser(tuple())
+ make_parser(set())
+ make_parser(frozenset())
+ make_parser({})
+ make_parser(iter([]))
+
+ def test_make_parser5(self):
+ # Testing that make_parser can handle iterables with more than
+ # one item.
+ make_parser(['module1', 'module2'])
+ make_parser(('module1', 'module2'))
+ make_parser({'module1', 'module2'})
+ make_parser(frozenset({'module1', 'module2'}))
+ make_parser({'module1': None, 'module2': None})
+ make_parser(iter(['module1', 'module2']))
+
+# ===========================================================================
+#
+# saxutils tests
+#
+# ===========================================================================
+
+class SaxutilsTest(unittest.TestCase):
+ # ===== escape
+ def test_escape_basic(self):
+ self.assertEqual(escape("Donald Duck & Co"), "Donald Duck & Co")
+
+ def test_escape_all(self):
+ self.assertEqual(escape(""),
+ "<Donald Duck & Co>")
+
+ def test_escape_extra(self):
+ self.assertEqual(escape("Hei på deg", {"å" : "å"}),
+ "Hei på deg")
+
+ # ===== unescape
+ def test_unescape_basic(self):
+ self.assertEqual(unescape("Donald Duck & Co"), "Donald Duck & Co")
+
+ def test_unescape_all(self):
+ self.assertEqual(unescape("<Donald Duck & Co>"),
+ "")
+
+ def test_unescape_extra(self):
+ self.assertEqual(unescape("Hei på deg", {"å" : "å"}),
+ "Hei på deg")
+
+ def test_unescape_amp_extra(self):
+ self.assertEqual(unescape("&foo;", {"&foo;": "splat"}), "&foo;")
+
+ # ===== quoteattr
+ def test_quoteattr_basic(self):
+ self.assertEqual(quoteattr("Donald Duck & Co"),
+ '"Donald Duck & Co"')
+
+ def test_single_quoteattr(self):
+ self.assertEqual(quoteattr('Includes "double" quotes'),
+ '\'Includes "double" quotes\'')
+
+ def test_double_quoteattr(self):
+ self.assertEqual(quoteattr("Includes 'single' quotes"),
+ "\"Includes 'single' quotes\"")
+
+ def test_single_double_quoteattr(self):
+ self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"),
+ "\"Includes 'single' and "double" quotes\"")
+
+ # ===== make_parser
+ def test_make_parser(self):
+ # Creating a parser should succeed - it should fall back
+ # to the expatreader
+ p = make_parser(['xml.parsers.no_such_parser'])
+
+
+class PrepareInputSourceTest(unittest.TestCase):
+
+ def setUp(self):
+ self.file = os_helper.TESTFN
+ with open(self.file, "w") as tmp:
+ tmp.write("This was read from a file.")
+
+ def tearDown(self):
+ os_helper.unlink(self.file)
+
+ def make_byte_stream(self):
+ return BytesIO(b"This is a byte stream.")
+
+ def make_character_stream(self):
+ return StringIO("This is a character stream.")
+
+ def checkContent(self, stream, content):
+ self.assertIsNotNone(stream)
+ self.assertEqual(stream.read(), content)
+ stream.close()
+
+
+ def test_character_stream(self):
+ # If the source is an InputSource with a character stream, use it.
+ src = InputSource(self.file)
+ src.setCharacterStream(self.make_character_stream())
+ prep = prepare_input_source(src)
+ self.assertIsNone(prep.getByteStream())
+ self.checkContent(prep.getCharacterStream(),
+ "This is a character stream.")
+
+ def test_byte_stream(self):
+ # If the source is an InputSource that does not have a character
+ # stream but does have a byte stream, use the byte stream.
+ src = InputSource(self.file)
+ src.setByteStream(self.make_byte_stream())
+ prep = prepare_input_source(src)
+ self.assertIsNone(prep.getCharacterStream())
+ self.checkContent(prep.getByteStream(),
+ b"This is a byte stream.")
+
+ def test_system_id(self):
+ # If the source is an InputSource that has neither a character
+ # stream nor a byte stream, open the system ID.
+ src = InputSource(self.file)
+ prep = prepare_input_source(src)
+ self.assertIsNone(prep.getCharacterStream())
+ self.checkContent(prep.getByteStream(),
+ b"This was read from a file.")
+
+ def test_string(self):
+ # If the source is a string, use it as a system ID and open it.
+ prep = prepare_input_source(self.file)
+ self.assertIsNone(prep.getCharacterStream())
+ self.checkContent(prep.getByteStream(),
+ b"This was read from a file.")
+
+ def test_path_objects(self):
+ # If the source is a Path object, use it as a system ID and open it.
+ prep = prepare_input_source(FakePath(self.file))
+ self.assertIsNone(prep.getCharacterStream())
+ self.checkContent(prep.getByteStream(),
+ b"This was read from a file.")
+
+ def test_binary_file(self):
+ # If the source is a binary file-like object, use it as a byte
+ # stream.
+ prep = prepare_input_source(self.make_byte_stream())
+ self.assertIsNone(prep.getCharacterStream())
+ self.checkContent(prep.getByteStream(),
+ b"This is a byte stream.")
+
+ def test_text_file(self):
+ # If the source is a text file-like object, use it as a character
+ # stream.
+ prep = prepare_input_source(self.make_character_stream())
+ self.assertIsNone(prep.getByteStream())
+ self.checkContent(prep.getCharacterStream(),
+ "This is a character stream.")
+
+
+# ===== XMLGenerator
+
+class XmlgenTest:
+ def test_xmlgen_basic(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+ gen.startDocument()
+ gen.startElement("doc", {})
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(""))
+
+ def test_xmlgen_basic_empty(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result, short_empty_elements=True)
+ gen.startDocument()
+ gen.startElement("doc", {})
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(""))
+
+ def test_xmlgen_content(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ gen.startDocument()
+ gen.startElement("doc", {})
+ gen.characters("huhei")
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml("huhei"))
+
+ def test_xmlgen_content_empty(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result, short_empty_elements=True)
+
+ gen.startDocument()
+ gen.startElement("doc", {})
+ gen.characters("huhei")
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml("huhei"))
+
+ def test_xmlgen_pi(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ gen.startDocument()
+ gen.processingInstruction("test", "data")
+ gen.startElement("doc", {})
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(),
+ self.xml(""))
+
+ def test_xmlgen_content_escape(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ gen.startDocument()
+ gen.startElement("doc", {})
+ gen.characters("<huhei&"))
+
+ def test_xmlgen_attr_escape(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ gen.startDocument()
+ gen.startElement("doc", {"a": '"'})
+ gen.startElement("e", {"a": "'"})
+ gen.endElement("e")
+ gen.startElement("e", {"a": "'\""})
+ gen.endElement("e")
+ gen.startElement("e", {"a": "\n\r\t"})
+ gen.endElement("e")
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(
+ ""
+ ""
+ ""))
+
+ def test_xmlgen_encoding(self):
+ encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
+ 'utf-16', 'utf-16be', 'utf-16le',
+ 'utf-32', 'utf-32be', 'utf-32le')
+ for encoding in encodings:
+ result = self.ioclass()
+ gen = XMLGenerator(result, encoding=encoding)
+
+ gen.startDocument()
+ gen.startElement("doc", {"a": '\u20ac'})
+ gen.characters("\u20ac")
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(),
+ self.xml('\u20ac', encoding=encoding))
+
+ def test_xmlgen_unencodable(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result, encoding='ascii')
+
+ gen.startDocument()
+ gen.startElement("doc", {"a": '\u20ac'})
+ gen.characters("\u20ac")
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(),
+ self.xml('€', encoding='ascii'))
+
+ def test_xmlgen_ignorable(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ gen.startDocument()
+ gen.startElement("doc", {})
+ gen.ignorableWhitespace(" ")
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(" "))
+
+ def test_xmlgen_ignorable_empty(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result, short_empty_elements=True)
+
+ gen.startDocument()
+ gen.startElement("doc", {})
+ gen.ignorableWhitespace(" ")
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(" "))
+
+ def test_xmlgen_encoding_bytes(self):
+ encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
+ 'utf-16', 'utf-16be', 'utf-16le',
+ 'utf-32', 'utf-32be', 'utf-32le')
+ for encoding in encodings:
+ result = self.ioclass()
+ gen = XMLGenerator(result, encoding=encoding)
+
+ gen.startDocument()
+ gen.startElement("doc", {"a": '\u20ac'})
+ gen.characters("\u20ac".encode(encoding))
+ gen.ignorableWhitespace(" ".encode(encoding))
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(),
+ self.xml('\u20ac ', encoding=encoding))
+
+ def test_xmlgen_ns(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ gen.startDocument()
+ gen.startPrefixMapping("ns1", ns_uri)
+ gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
+ # add an unqualified name
+ gen.startElementNS((None, "udoc"), None, {})
+ gen.endElementNS((None, "udoc"), None)
+ gen.endElementNS((ns_uri, "doc"), "ns1:doc")
+ gen.endPrefixMapping("ns1")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(
+ '' %
+ ns_uri))
+
+ def test_xmlgen_ns_empty(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result, short_empty_elements=True)
+
+ gen.startDocument()
+ gen.startPrefixMapping("ns1", ns_uri)
+ gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
+ # add an unqualified name
+ gen.startElementNS((None, "udoc"), None, {})
+ gen.endElementNS((None, "udoc"), None)
+ gen.endElementNS((ns_uri, "doc"), "ns1:doc")
+ gen.endPrefixMapping("ns1")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(
+ '' %
+ ns_uri))
+
+ def test_1463026_1(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ gen.startDocument()
+ gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
+ gen.endElementNS((None, 'a'), 'a')
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(''))
+
+ def test_1463026_1_empty(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result, short_empty_elements=True)
+
+ gen.startDocument()
+ gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
+ gen.endElementNS((None, 'a'), 'a')
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(''))
+
+ def test_1463026_2(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ gen.startDocument()
+ gen.startPrefixMapping(None, 'qux')
+ gen.startElementNS(('qux', 'a'), 'a', {})
+ gen.endElementNS(('qux', 'a'), 'a')
+ gen.endPrefixMapping(None)
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(''))
+
+ def test_1463026_2_empty(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result, short_empty_elements=True)
+
+ gen.startDocument()
+ gen.startPrefixMapping(None, 'qux')
+ gen.startElementNS(('qux', 'a'), 'a', {})
+ gen.endElementNS(('qux', 'a'), 'a')
+ gen.endPrefixMapping(None)
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), self.xml(''))
+
+ def test_1463026_3(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ gen.startDocument()
+ gen.startPrefixMapping('my', 'qux')
+ gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
+ gen.endElementNS(('qux', 'a'), 'a')
+ gen.endPrefixMapping('my')
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(),
+ self.xml(''))
+
+ def test_1463026_3_empty(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result, short_empty_elements=True)
+
+ gen.startDocument()
+ gen.startPrefixMapping('my', 'qux')
+ gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
+ gen.endElementNS(('qux', 'a'), 'a')
+ gen.endPrefixMapping('my')
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(),
+ self.xml(''))
+
+ def test_5027_1(self):
+ # The xml prefix (as in xml:lang below) is reserved and bound by
+ # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
+ # a bug whereby a KeyError is raised because this namespace is missing
+ # from a dictionary.
+ #
+ # This test demonstrates the bug by parsing a document.
+ test_xml = StringIO(
+ ''
+ ''
+ 'Hello'
+ '')
+
+ parser = make_parser()
+ parser.setFeature(feature_namespaces, True)
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+ parser.setContentHandler(gen)
+ parser.parse(test_xml)
+
+ self.assertEqual(result.getvalue(),
+ self.xml(
+ ''
+ 'Hello'
+ ''))
+
+ def test_5027_2(self):
+ # The xml prefix (as in xml:lang below) is reserved and bound by
+ # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
+ # a bug whereby a KeyError is raised because this namespace is missing
+ # from a dictionary.
+ #
+ # This test demonstrates the bug by direct manipulation of the
+ # XMLGenerator.
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ gen.startDocument()
+ gen.startPrefixMapping('a', 'http://example.com/ns')
+ gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {})
+ lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'}
+ gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr)
+ gen.characters('Hello')
+ gen.endElementNS(('http://example.com/ns', 'g2'), 'g2')
+ gen.endElementNS(('http://example.com/ns', 'g1'), 'g1')
+ gen.endPrefixMapping('a')
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(),
+ self.xml(
+ ''
+ 'Hello'
+ ''))
+
+ def test_no_close_file(self):
+ result = self.ioclass()
+ def func(out):
+ gen = XMLGenerator(out)
+ gen.startDocument()
+ gen.startElement("doc", {})
+ func(result)
+ self.assertFalse(result.closed)
+
+ def test_xmlgen_fragment(self):
+ result = self.ioclass()
+ gen = XMLGenerator(result)
+
+ # Don't call gen.startDocument()
+ gen.startElement("foo", {"a": "1.0"})
+ gen.characters("Hello")
+ gen.endElement("foo")
+ gen.startElement("bar", {"b": "2.0"})
+ gen.endElement("bar")
+ # Don't call gen.endDocument()
+
+ self.assertEqual(result.getvalue(),
+ self.xml('Hello')[len(self.xml('')):])
+
+class StringXmlgenTest(XmlgenTest, unittest.TestCase):
+ ioclass = StringIO
+
+ def xml(self, doc, encoding='iso-8859-1'):
+ return '\n%s' % (encoding, doc)
+
+ test_xmlgen_unencodable = None
+
+class BytesXmlgenTest(XmlgenTest, unittest.TestCase):
+ ioclass = BytesIO
+
+ def xml(self, doc, encoding='iso-8859-1'):
+ return ('\n%s' %
+ (encoding, doc)).encode(encoding, 'xmlcharrefreplace')
+
+class WriterXmlgenTest(BytesXmlgenTest):
+ class ioclass(list):
+ write = list.append
+ closed = False
+
+ def seekable(self):
+ return True
+
+ def tell(self):
+ # return 0 at start and not 0 after start
+ return len(self)
+
+ def getvalue(self):
+ return b''.join(self)
+
+class StreamWriterXmlgenTest(XmlgenTest, unittest.TestCase):
+ def ioclass(self):
+ raw = BytesIO()
+ writer = codecs.getwriter('ascii')(raw, 'xmlcharrefreplace')
+ writer.getvalue = raw.getvalue
+ return writer
+
+ def xml(self, doc, encoding='iso-8859-1'):
+ return ('\n%s' %
+ (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
+
+class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase):
+ fname = os_helper.TESTFN + '-codecs'
+
+ def ioclass(self):
+ with self.assertWarns(DeprecationWarning):
+ writer = codecs.open(self.fname, 'w', encoding='ascii',
+ errors='xmlcharrefreplace', buffering=0)
+ def cleanup():
+ writer.close()
+ os_helper.unlink(self.fname)
+ self.addCleanup(cleanup)
+ def getvalue():
+ # Windows will not let use reopen without first closing
+ writer.close()
+ with open(writer.name, 'rb') as f:
+ return f.read()
+ writer.getvalue = getvalue
+ return writer
+
+ def xml(self, doc, encoding='iso-8859-1'):
+ return ('\n%s' %
+ (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
+
+start = b'\n'
+
+
+class XMLFilterBaseTest(unittest.TestCase):
+ def test_filter_basic(self):
+ result = BytesIO()
+ gen = XMLGenerator(result)
+ filter = XMLFilterBase()
+ filter.setContentHandler(gen)
+
+ filter.startDocument()
+ filter.startElement("doc", {})
+ filter.characters("content")
+ filter.ignorableWhitespace(" ")
+ filter.endElement("doc")
+ filter.endDocument()
+
+ self.assertEqual(result.getvalue(), start + b"content ")
+
+# ===========================================================================
+#
+# expatreader tests
+#
+# ===========================================================================
+
+with open(TEST_XMLFILE_OUT, 'rb') as f:
+ xml_test_out = f.read()
+
+class ExpatReaderTest(XmlTestBase):
+
+ # ===== XMLReader support
+
+ def test_expat_binary_file(self):
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ with open(TEST_XMLFILE, 'rb') as f:
+ parser.parse(f)
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
+ def test_expat_text_file(self):
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
+ parser.parse(f)
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
+ @requires_nonascii_filenames
+ def test_expat_binary_file_nonascii(self):
+ fname = os_helper.TESTFN_UNICODE
+ shutil.copyfile(TEST_XMLFILE, fname)
+ self.addCleanup(os_helper.unlink, fname)
+
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ parser.parse(open(fname, 'rb'))
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
+ def test_expat_binary_file_bytes_name(self):
+ fname = os.fsencode(TEST_XMLFILE)
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ with open(fname, 'rb') as f:
+ parser.parse(f)
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
+ def test_expat_binary_file_int_name(self):
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ with open(TEST_XMLFILE, 'rb') as f:
+ with open(f.fileno(), 'rb', closefd=False) as f2:
+ parser.parse(f2)
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
+ # ===== DTDHandler support
+
+ class TestDTDHandler:
+
+ def __init__(self):
+ self._notations = []
+ self._entities = []
+
+ def notationDecl(self, name, publicId, systemId):
+ self._notations.append((name, publicId, systemId))
+
+ def unparsedEntityDecl(self, name, publicId, systemId, ndata):
+ self._entities.append((name, publicId, systemId, ndata))
+
+
+ class TestEntityRecorder:
+ def __init__(self):
+ self.entities = []
+
+ def resolveEntity(self, publicId, systemId):
+ self.entities.append((publicId, systemId))
+ source = InputSource()
+ source.setPublicId(publicId)
+ source.setSystemId(systemId)
+ return source
+
+ def test_expat_dtdhandler(self):
+ parser = create_parser()
+ handler = self.TestDTDHandler()
+ parser.setDTDHandler(handler)
+
+ parser.feed('\n')
+ parser.feed(' \n')
+ parser.feed(']>\n')
+ parser.feed('')
+ parser.close()
+
+ self.assertEqual(handler._notations,
+ [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
+ self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
+
+ def test_expat_external_dtd_enabled(self):
+ # clear _opener global variable
+ self.addCleanup(urllib.request.urlcleanup)
+
+ parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
+ resolver = self.TestEntityRecorder()
+ parser.setEntityResolver(resolver)
+
+ with self.assertRaises(URLError):
+ parser.feed(
+ '\n'
+ )
+ self.assertEqual(
+ resolver.entities, [(None, 'unsupported://non-existing')]
+ )
+
+ def test_expat_external_dtd_default(self):
+ parser = create_parser()
+ resolver = self.TestEntityRecorder()
+ parser.setEntityResolver(resolver)
+
+ parser.feed(
+ '\n'
+ )
+ parser.feed('')
+ parser.close()
+ self.assertEqual(resolver.entities, [])
+
+ # ===== EntityResolver support
+
+ class TestEntityResolver:
+
+ def resolveEntity(self, publicId, systemId):
+ inpsrc = InputSource()
+ inpsrc.setByteStream(BytesIO(b""))
+ return inpsrc
+
+ def test_expat_entityresolver_enabled(self):
+ parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
+ parser.setEntityResolver(self.TestEntityResolver())
+ result = BytesIO()
+ parser.setContentHandler(XMLGenerator(result))
+
+ parser.feed('\n')
+ parser.feed(']>\n')
+ parser.feed('&test;')
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start +
+ b"")
+
+ def test_expat_entityresolver_default(self):
+ parser = create_parser()
+ self.assertEqual(parser.getFeature(feature_external_ges), False)
+ parser.setEntityResolver(self.TestEntityResolver())
+ result = BytesIO()
+ parser.setContentHandler(XMLGenerator(result))
+
+ parser.feed('\n')
+ parser.feed(']>\n')
+ parser.feed('&test;')
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start +
+ b"")
+
+ # ===== Attributes support
+
+ class AttrGatherer(ContentHandler):
+
+ def startElement(self, name, attrs):
+ self._attrs = attrs
+
+ def startElementNS(self, name, qname, attrs):
+ self._attrs = attrs
+
+ def test_expat_attrs_empty(self):
+ parser = create_parser()
+ gather = self.AttrGatherer()
+ parser.setContentHandler(gather)
+
+ parser.feed("")
+ parser.close()
+
+ self.verify_empty_attrs(gather._attrs)
+
+ def test_expat_attrs_wattr(self):
+ parser = create_parser()
+ gather = self.AttrGatherer()
+ parser.setContentHandler(gather)
+
+ parser.feed("")
+ parser.close()
+
+ self.verify_attrs_wattr(gather._attrs)
+
+ def test_expat_nsattrs_empty(self):
+ parser = create_parser(1)
+ gather = self.AttrGatherer()
+ parser.setContentHandler(gather)
+
+ parser.feed("")
+ parser.close()
+
+ self.verify_empty_nsattrs(gather._attrs)
+
+ def test_expat_nsattrs_wattr(self):
+ parser = create_parser(1)
+ gather = self.AttrGatherer()
+ parser.setContentHandler(gather)
+
+ parser.feed("" % ns_uri)
+ parser.close()
+
+ attrs = gather._attrs
+
+ self.assertEqual(attrs.getLength(), 1)
+ self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
+ self.assertTrue((attrs.getQNames() == [] or
+ attrs.getQNames() == ["ns:attr"]))
+ self.assertEqual(len(attrs), 1)
+ self.assertIn((ns_uri, "attr"), attrs)
+ self.assertEqual(attrs.get((ns_uri, "attr")), "val")
+ self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
+ self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
+ self.assertEqual(list(attrs.values()), ["val"])
+ self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
+ self.assertEqual(attrs[(ns_uri, "attr")], "val")
+
+ # ===== InputSource support
+
+ def test_expat_inpsource_filename(self):
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ parser.parse(TEST_XMLFILE)
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
+ def test_expat_inpsource_sysid(self):
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ parser.parse(InputSource(TEST_XMLFILE))
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
+ @requires_nonascii_filenames
+ def test_expat_inpsource_sysid_nonascii(self):
+ fname = os_helper.TESTFN_UNICODE
+ shutil.copyfile(TEST_XMLFILE, fname)
+ self.addCleanup(os_helper.unlink, fname)
+
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ parser.parse(InputSource(fname))
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
+ def test_expat_inpsource_byte_stream(self):
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+ with open(TEST_XMLFILE, 'rb') as f:
+ inpsrc.setByteStream(f)
+ parser.parse(inpsrc)
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
+ def test_expat_inpsource_character_stream(self):
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+ with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
+ inpsrc.setCharacterStream(f)
+ parser.parse(inpsrc)
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
+ # ===== IncrementalParser support
+
+ def test_expat_incremental(self):
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+
+ parser.feed("")
+ parser.feed("")
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start + b"")
+
+ def test_expat_incremental_reset(self):
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+
+ parser.feed("")
+ parser.feed("text")
+
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser.setContentHandler(xmlgen)
+ parser.reset()
+
+ parser.feed("")
+ parser.feed("text")
+ parser.feed("")
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start + b"text")
+
+ @unittest.skipIf(pyexpat.version_info < (2, 6, 0),
+ f'Expat {pyexpat.version_info} does not '
+ 'support reparse deferral')
+ def test_flush_reparse_deferral_enabled(self):
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+
+ for chunk in (""):
+ parser.feed(chunk)
+
+ self.assertEqual(result.getvalue(), start) # i.e. no elements started
+ self.assertTrue(parser._parser.GetReparseDeferralEnabled())
+
+ parser.flush()
+
+ self.assertTrue(parser._parser.GetReparseDeferralEnabled())
+ self.assertEqual(result.getvalue(), start + b"")
+
+ parser.feed("")
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start + b"")
+
+ def test_flush_reparse_deferral_disabled(self):
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+
+ for chunk in (""):
+ parser.feed(chunk)
+
+ if pyexpat.version_info >= (2, 6, 0):
+ parser._parser.SetReparseDeferralEnabled(False)
+ self.assertEqual(result.getvalue(), start) # i.e. no elements started
+
+ self.assertFalse(parser._parser.GetReparseDeferralEnabled())
+
+ parser.flush()
+
+ self.assertFalse(parser._parser.GetReparseDeferralEnabled())
+ self.assertEqual(result.getvalue(), start + b"")
+
+ parser.feed("")
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start + b"")
+
+ # ===== Locator support
+
+ def test_expat_locator_noinfo(self):
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+
+ parser.feed("")
+ parser.feed("")
+ parser.close()
+
+ self.assertEqual(parser.getSystemId(), None)
+ self.assertEqual(parser.getPublicId(), None)
+ self.assertEqual(parser.getLineNumber(), 1)
+
+ def test_expat_locator_withinfo(self):
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+ parser.parse(TEST_XMLFILE)
+
+ self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
+ self.assertEqual(parser.getPublicId(), None)
+
+ @requires_nonascii_filenames
+ def test_expat_locator_withinfo_nonascii(self):
+ fname = os_helper.TESTFN_UNICODE
+ shutil.copyfile(TEST_XMLFILE, fname)
+ self.addCleanup(os_helper.unlink, fname)
+
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+ parser.parse(fname)
+
+ self.assertEqual(parser.getSystemId(), fname)
+ self.assertEqual(parser.getPublicId(), None)
+
+
+# ===========================================================================
+#
+# error reporting
+#
+# ===========================================================================
+
+class ErrorReportingTest(unittest.TestCase):
+ def test_expat_inpsource_location(self):
+ parser = create_parser()
+ parser.setContentHandler(ContentHandler()) # do nothing
+ source = InputSource()
+ source.setByteStream(BytesIO(b"")) #ill-formed
+ name = "a file name"
+ source.setSystemId(name)
+ try:
+ parser.parse(source)
+ self.fail()
+ except SAXException as e:
+ self.assertEqual(e.getSystemId(), name)
+
+ def test_expat_incomplete(self):
+ parser = create_parser()
+ parser.setContentHandler(ContentHandler()) # do nothing
+ self.assertRaises(SAXParseException, parser.parse, StringIO(""))
+ self.assertEqual(parser.getColumnNumber(), 5)
+ self.assertEqual(parser.getLineNumber(), 1)
+
+ def test_sax_parse_exception_str(self):
+ # pass various values from a locator to the SAXParseException to
+ # make sure that the __str__() doesn't fall apart when None is
+ # passed instead of an integer line and column number
+ #
+ # use "normal" values for the locator:
+ str(SAXParseException("message", None,
+ self.DummyLocator(1, 1)))
+ # use None for the line number:
+ str(SAXParseException("message", None,
+ self.DummyLocator(None, 1)))
+ # use None for the column number:
+ str(SAXParseException("message", None,
+ self.DummyLocator(1, None)))
+ # use None for both:
+ str(SAXParseException("message", None,
+ self.DummyLocator(None, None)))
+
+ class DummyLocator:
+ def __init__(self, lineno, colno):
+ self._lineno = lineno
+ self._colno = colno
+
+ def getPublicId(self):
+ return "pubid"
+
+ def getSystemId(self):
+ return "sysid"
+
+ def getLineNumber(self):
+ return self._lineno
+
+ def getColumnNumber(self):
+ return self._colno
+
+# ===========================================================================
+#
+# xmlreader tests
+#
+# ===========================================================================
+
+class XmlReaderTest(XmlTestBase):
+
+ # ===== AttributesImpl
+ def test_attrs_empty(self):
+ self.verify_empty_attrs(AttributesImpl({}))
+
+ def test_attrs_wattr(self):
+ self.verify_attrs_wattr(AttributesImpl({"attr" : "val"}))
+
+ def test_nsattrs_empty(self):
+ self.verify_empty_nsattrs(AttributesNSImpl({}, {}))
+
+ def test_nsattrs_wattr(self):
+ attrs = AttributesNSImpl({(ns_uri, "attr") : "val"},
+ {(ns_uri, "attr") : "ns:attr"})
+
+ self.assertEqual(attrs.getLength(), 1)
+ self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
+ self.assertEqual(attrs.getQNames(), ["ns:attr"])
+ self.assertEqual(len(attrs), 1)
+ self.assertIn((ns_uri, "attr"), attrs)
+ self.assertEqual(list(attrs.keys()), [(ns_uri, "attr")])
+ self.assertEqual(attrs.get((ns_uri, "attr")), "val")
+ self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
+ self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
+ self.assertEqual(list(attrs.values()), ["val"])
+ self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
+ self.assertEqual(attrs.getValueByQName("ns:attr"), "val")
+ self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr"))
+ self.assertEqual(attrs[(ns_uri, "attr")], "val")
+ self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
+
+
+class LexicalHandlerTest(unittest.TestCase):
+ def setUp(self):
+ self.parser = None
+
+ self.specified_version = '1.0'
+ self.specified_encoding = 'UTF-8'
+ self.specified_doctype = 'wish'
+ self.specified_entity_names = ('nbsp', 'source', 'target')
+ self.specified_comment = ('Comment in a DTD',
+ 'Really! You think so?')
+ self.test_data = StringIO()
+ self.test_data.write('\n'.
+ format(self.specified_version,
+ self.specified_encoding))
+ self.test_data.write('\n'.
+ format(self.specified_comment[0]))
+ self.test_data.write('\n'.
+ format(self.specified_doctype))
+ self.test_data.write('\n')
+ self.test_data.write('\n')
+ self.test_data.write('\n')
+ self.test_data.write('\n')
+ self.test_data.write('\n')
+ self.test_data.write('\n'.
+ format(self.specified_entity_names[0]))
+ self.test_data.write('\n'.
+ format(self.specified_entity_names[1]))
+ self.test_data.write('\n'.
+ format(self.specified_entity_names[2]))
+ self.test_data.write(']>\n')
+ self.test_data.write('<{}>'.format(self.specified_doctype))
+ self.test_data.write('Aristotle\n')
+ self.test_data.write('Alexander\n')
+ self.test_data.write('Supplication\n')
+ self.test_data.write('Teach me patience!\n')
+ self.test_data.write('\n'.
+ format(self.specified_entity_names[1],
+ self.specified_entity_names[0],
+ self.specified_entity_names[2]))
+ self.test_data.write('\n'.format(self.specified_comment[1]))
+ self.test_data.write('{}>\n'.format(self.specified_doctype))
+ self.test_data.seek(0)
+
+ # Data received from handlers - to be validated
+ self.version = None
+ self.encoding = None
+ self.standalone = None
+ self.doctype = None
+ self.publicID = None
+ self.systemID = None
+ self.end_of_dtd = False
+ self.comments = []
+
+ def test_handlers(self):
+ class TestLexicalHandler(LexicalHandler):
+ def __init__(self, test_harness, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.test_harness = test_harness
+
+ def startDTD(self, doctype, publicID, systemID):
+ self.test_harness.doctype = doctype
+ self.test_harness.publicID = publicID
+ self.test_harness.systemID = systemID
+
+ def endDTD(self):
+ self.test_harness.end_of_dtd = True
+
+ def comment(self, text):
+ self.test_harness.comments.append(text)
+
+ self.parser = create_parser()
+ self.parser.setContentHandler(ContentHandler())
+ self.parser.setProperty(
+ 'http://xml.org/sax/properties/lexical-handler',
+ TestLexicalHandler(self))
+ source = InputSource()
+ source.setCharacterStream(self.test_data)
+ self.parser.parse(source)
+ self.assertEqual(self.doctype, self.specified_doctype)
+ self.assertIsNone(self.publicID)
+ self.assertIsNone(self.systemID)
+ self.assertTrue(self.end_of_dtd)
+ self.assertEqual(len(self.comments),
+ len(self.specified_comment))
+ self.assertEqual(f' {self.specified_comment[0]} ', self.comments[0])
+
+
+class CDATAHandlerTest(unittest.TestCase):
+ def setUp(self):
+ self.parser = None
+ self.specified_chars = []
+ self.specified_chars.append(('Parseable character data', False))
+ self.specified_chars.append(('<> &% - assorted other XML junk.', True))
+ self.char_index = 0 # Used to index specified results within handlers
+ self.test_data = StringIO()
+ self.test_data.write('\n')
+ self.test_data.write('\n')
+ self.test_data.write(f'{self.specified_chars[0][0]}\n')
+ self.test_data.write('\n')
+ self.test_data.write('\n')
+ self.test_data.write(f'\n')
+ self.test_data.write('\n')
+ self.test_data.write('\n')
+ self.test_data.seek(0)
+
+ # Data received from handlers - to be validated
+ self.chardata = []
+ self.in_cdata = False
+
+ def test_handlers(self):
+ class TestLexicalHandler(LexicalHandler):
+ def __init__(self, test_harness, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.test_harness = test_harness
+
+ def startCDATA(self):
+ self.test_harness.in_cdata = True
+
+ def endCDATA(self):
+ self.test_harness.in_cdata = False
+
+ class TestCharHandler(ContentHandler):
+ def __init__(self, test_harness, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.test_harness = test_harness
+
+ def characters(self, content):
+ if content != '\n':
+ h = self.test_harness
+ t = h.specified_chars[h.char_index]
+ h.assertEqual(t[0], content)
+ h.assertEqual(t[1], h.in_cdata)
+ h.char_index += 1
+
+ self.parser = create_parser()
+ self.parser.setContentHandler(TestCharHandler(self))
+ self.parser.setProperty(
+ 'http://xml.org/sax/properties/lexical-handler',
+ TestLexicalHandler(self))
+ source = InputSource()
+ source.setCharacterStream(self.test_data)
+ self.parser.parse(source)
+
+ self.assertFalse(self.in_cdata)
+ self.assertEqual(self.char_index, 2)
+
+
+class TestModuleAll(unittest.TestCase):
+ def test_all(self):
+ extra = (
+ 'ContentHandler',
+ 'ErrorHandler',
+ 'InputSource',
+ 'SAXException',
+ 'SAXNotRecognizedException',
+ 'SAXNotSupportedException',
+ 'SAXParseException',
+ 'SAXReaderNotAvailable',
+ )
+ check__all__(self, sax, extra=extra)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 9d6d39307..543583fbd 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -155,9 +155,9 @@ class ModuleTest(unittest.TestCase):
def test_sanity(self):
# Import sanity.
- from xml.etree import ElementTree
- from xml.etree import ElementInclude
- from xml.etree import ElementPath
+ from xml.etree import ElementTree # noqa: F401
+ from xml.etree import ElementInclude # noqa: F401
+ from xml.etree import ElementPath # noqa: F401
def test_all(self):
names = ("xml.etree.ElementTree", "_elementtree")
@@ -252,8 +252,7 @@ class ElementTreeTest(unittest.TestCase):
self.assertTrue(ET.iselement(element), msg="not an element")
direlem = dir(element)
for attr in 'tag', 'attrib', 'text', 'tail':
- self.assertTrue(hasattr(element, attr),
- msg='no %s member' % attr)
+ self.assertHasAttr(element, attr)
self.assertIn(attr, direlem,
msg='no %s visible by dir' % attr)
@@ -278,7 +277,7 @@ class ElementTreeTest(unittest.TestCase):
# Make sure all standard element methods exist.
def check_method(method):
- self.assertTrue(hasattr(method, '__call__'),
+ self.assertHasAttr(method, '__call__',
msg="%s not callable" % method)
check_method(element.append)
@@ -339,7 +338,7 @@ class ElementTreeTest(unittest.TestCase):
element.attrib = {'A': 'B', 'C': 'D'}
self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_simpleops(self):
# Basic method sanity checks.
@@ -372,9 +371,9 @@ class ElementTreeTest(unittest.TestCase):
self.serialize_check(element, '') # 4
element.remove(subelement)
self.serialize_check(element, '') # 5
- with self.assertRaises(ValueError) as cm:
+ with self.assertRaisesRegex(ValueError,
+ r'Element\.remove\(.+\): element not found'):
element.remove(subelement)
- self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
self.serialize_check(element, '') # 6
element[0:0] = [subelement, subelement, subelement]
self.serialize_check(element[1], '')
@@ -394,7 +393,7 @@ class ElementTreeTest(unittest.TestCase):
self.serialize_check(ET.XML(""),
'hello')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_file_init(self):
stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
tree = ET.ElementTree(file=stringfile)
@@ -510,7 +509,7 @@ class ElementTreeTest(unittest.TestCase):
elem[:] = tuple([subelem])
self.serialize_check(elem, '')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_parsefile(self):
# Test parsing from file.
@@ -556,7 +555,7 @@ class ElementTreeTest(unittest.TestCase):
' \n'
'')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_parseliteral(self):
element = ET.XML("text")
self.assertEqual(ET.tostring(element, encoding='unicode'),
@@ -579,210 +578,6 @@ class ElementTreeTest(unittest.TestCase):
self.assertEqual(len(ids), 1)
self.assertEqual(ids["body"].tag, 'body')
- @unittest.expectedFailure # TODO: RUSTPYTHON
- def test_iterparse(self):
- # Test iterparse interface.
-
- iterparse = ET.iterparse
-
- context = iterparse(SIMPLE_XMLFILE)
- self.assertIsNone(context.root)
- action, elem = next(context)
- self.assertIsNone(context.root)
- self.assertEqual((action, elem.tag), ('end', 'element'))
- self.assertEqual([(action, elem.tag) for action, elem in context], [
- ('end', 'element'),
- ('end', 'empty-element'),
- ('end', 'root'),
- ])
- self.assertEqual(context.root.tag, 'root')
-
- context = iterparse(SIMPLE_NS_XMLFILE)
- self.assertEqual([(action, elem.tag) for action, elem in context], [
- ('end', '{namespace}element'),
- ('end', '{namespace}element'),
- ('end', '{namespace}empty-element'),
- ('end', '{namespace}root'),
- ])
-
- with open(SIMPLE_XMLFILE, 'rb') as source:
- context = iterparse(source)
- action, elem = next(context)
- self.assertEqual((action, elem.tag), ('end', 'element'))
- self.assertEqual([(action, elem.tag) for action, elem in context], [
- ('end', 'element'),
- ('end', 'empty-element'),
- ('end', 'root'),
- ])
- self.assertEqual(context.root.tag, 'root')
-
- events = ()
- context = iterparse(SIMPLE_XMLFILE, events)
- self.assertEqual([(action, elem.tag) for action, elem in context], [])
-
- events = ()
- context = iterparse(SIMPLE_XMLFILE, events=events)
- self.assertEqual([(action, elem.tag) for action, elem in context], [])
-
- events = ("start", "end")
- context = iterparse(SIMPLE_XMLFILE, events)
- self.assertEqual([(action, elem.tag) for action, elem in context], [
- ('start', 'root'),
- ('start', 'element'),
- ('end', 'element'),
- ('start', 'element'),
- ('end', 'element'),
- ('start', 'empty-element'),
- ('end', 'empty-element'),
- ('end', 'root'),
- ])
-
- events = ("start", "end", "start-ns", "end-ns")
- context = iterparse(SIMPLE_NS_XMLFILE, events)
- self.assertEqual([(action, elem.tag) if action in ("start", "end")
- else (action, elem)
- for action, elem in context], [
- ('start-ns', ('', 'namespace')),
- ('start', '{namespace}root'),
- ('start', '{namespace}element'),
- ('end', '{namespace}element'),
- ('start', '{namespace}element'),
- ('end', '{namespace}element'),
- ('start', '{namespace}empty-element'),
- ('end', '{namespace}empty-element'),
- ('end', '{namespace}root'),
- ('end-ns', None),
- ])
-
- events = ('start-ns', 'end-ns')
- context = iterparse(io.StringIO(r""), events)
- res = [action for action, elem in context]
- self.assertEqual(res, ['start-ns', 'end-ns'])
-
- events = ("start", "end", "bogus")
- with open(SIMPLE_XMLFILE, "rb") as f:
- with self.assertRaises(ValueError) as cm:
- iterparse(f, events)
- self.assertFalse(f.closed)
- self.assertEqual(str(cm.exception), "unknown event 'bogus'")
-
- with warnings_helper.check_no_resource_warning(self):
- with self.assertRaises(ValueError) as cm:
- iterparse(SIMPLE_XMLFILE, events)
- self.assertEqual(str(cm.exception), "unknown event 'bogus'")
- del cm
-
- source = io.BytesIO(
- b"\n"
- b"text\n")
- events = ("start-ns",)
- context = iterparse(source, events)
- self.assertEqual([(action, elem) for action, elem in context], [
- ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
- ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
- ])
-
- source = io.StringIO("junk")
- it = iterparse(source)
- action, elem = next(it)
- self.assertEqual((action, elem.tag), ('end', 'document'))
- with self.assertRaises(ET.ParseError) as cm:
- next(it)
- self.assertEqual(str(cm.exception),
- 'junk after document element: line 1, column 12')
-
- self.addCleanup(os_helper.unlink, TESTFN)
- with open(TESTFN, "wb") as f:
- f.write(b"junk")
- it = iterparse(TESTFN)
- action, elem = next(it)
- self.assertEqual((action, elem.tag), ('end', 'document'))
- with warnings_helper.check_no_resource_warning(self):
- with self.assertRaises(ET.ParseError) as cm:
- next(it)
- self.assertEqual(str(cm.exception),
- 'junk after document element: line 1, column 12')
- del cm, it
-
- # Not exhausting the iterator still closes the resource (bpo-43292)
- with warnings_helper.check_no_resource_warning(self):
- it = iterparse(SIMPLE_XMLFILE)
- del it
-
- with warnings_helper.check_no_resource_warning(self):
- it = iterparse(SIMPLE_XMLFILE)
- it.close()
- del it
-
- with warnings_helper.check_no_resource_warning(self):
- it = iterparse(SIMPLE_XMLFILE)
- action, elem = next(it)
- self.assertEqual((action, elem.tag), ('end', 'element'))
- del it, elem
-
- with warnings_helper.check_no_resource_warning(self):
- it = iterparse(SIMPLE_XMLFILE)
- action, elem = next(it)
- it.close()
- self.assertEqual((action, elem.tag), ('end', 'element'))
- del it, elem
-
- with self.assertRaises(FileNotFoundError):
- iterparse("nonexistent")
-
- @unittest.expectedFailure # TODO: RUSTPYTHON
- def test_iterparse_close(self):
- iterparse = ET.iterparse
-
- it = iterparse(SIMPLE_XMLFILE)
- it.close()
- with self.assertRaises(StopIteration):
- next(it)
- it.close() # idempotent
-
- with open(SIMPLE_XMLFILE, 'rb') as source:
- it = iterparse(source)
- it.close()
- self.assertFalse(source.closed)
- with self.assertRaises(StopIteration):
- next(it)
- it.close() # idempotent
-
- it = iterparse(SIMPLE_XMLFILE)
- action, elem = next(it)
- self.assertEqual((action, elem.tag), ('end', 'element'))
- it.close()
- with self.assertRaises(StopIteration):
- next(it)
- it.close() # idempotent
-
- with open(SIMPLE_XMLFILE, 'rb') as source:
- it = iterparse(source)
- action, elem = next(it)
- self.assertEqual((action, elem.tag), ('end', 'element'))
- it.close()
- self.assertFalse(source.closed)
- with self.assertRaises(StopIteration):
- next(it)
- it.close() # idempotent
-
- it = iterparse(SIMPLE_XMLFILE)
- list(it)
- it.close()
- with self.assertRaises(StopIteration):
- next(it)
- it.close() # idempotent
-
- with open(SIMPLE_XMLFILE, 'rb') as source:
- it = iterparse(source)
- list(it)
- it.close()
- self.assertFalse(source.closed)
- with self.assertRaises(StopIteration):
- next(it)
- it.close() # idempotent
-
def test_writefile(self):
elem = ET.Element("tag")
elem.text = "text"
@@ -800,7 +595,7 @@ class ElementTreeTest(unittest.TestCase):
elem[0] = ET.PI("key", "value")
self.serialize_check(elem, 'textsubtext')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_custom_builder(self):
# Test parser w. custom builder.
@@ -862,7 +657,7 @@ class ElementTreeTest(unittest.TestCase):
('end-ns', ''),
])
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_custom_builder_only_end_ns(self):
class Builder(list):
def end_ns(self, prefix):
@@ -894,7 +689,7 @@ class ElementTreeTest(unittest.TestCase):
parser2 = ET.XMLParser()
self.assertIsInstance(parser2.target, ET.TreeBuilder)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_children(self):
# Test Element children iteration
@@ -1174,7 +969,7 @@ class ElementTreeTest(unittest.TestCase):
self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
self.assertEqual(['', '', ''], stringlist[1:])
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_encoding(self):
def check(encoding, body=''):
xml = ("%s" %
@@ -1256,7 +1051,7 @@ class ElementTreeTest(unittest.TestCase):
self.assertEqual(serialize(e, method="html"),
'text')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_entity(self):
# Test entity handling.
@@ -1294,7 +1089,7 @@ class ElementTreeTest(unittest.TestCase):
self.assertEqual(str(cm.exception),
'undefined entity &entity;: line 4, column 10')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_namespace(self):
# Test namespace issues.
@@ -1505,13 +1300,249 @@ class ElementTreeTest(unittest.TestCase):
self.assertEqual(serialize(root, method='html'),
'')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_attlist_default(self):
# Test default attribute values; See BPO 42151.
root = ET.fromstring(ATTLIST_XML)
self.assertEqual(root[0].attrib,
{'{http://www.w3.org/XML/1998/namespace}lang': 'eng'})
+ @unittest.expectedFailure # TODO: RUSTPYTHON
+ def test_iterparse(self):
+ return super().test_iterparse()
+
+ @unittest.expectedFailure # TODO: RUSTPYTHON
+ def test_iterparse_close(self):
+ return super().test_iterparse_close()
+
+
+class IterparseTest(unittest.TestCase):
+ # Test iterparse interface.
+
+ def test_basic(self):
+ iterparse = ET.iterparse
+
+ it = iterparse(SIMPLE_XMLFILE)
+ self.assertIsNone(it.root)
+ action, elem = next(it)
+ self.assertIsNone(it.root)
+ self.assertEqual((action, elem.tag), ('end', 'element'))
+ self.assertEqual([(action, elem.tag) for action, elem in it], [
+ ('end', 'element'),
+ ('end', 'empty-element'),
+ ('end', 'root'),
+ ])
+ self.assertEqual(it.root.tag, 'root')
+ it.close()
+
+ it = iterparse(SIMPLE_NS_XMLFILE)
+ self.assertEqual([(action, elem.tag) for action, elem in it], [
+ ('end', '{namespace}element'),
+ ('end', '{namespace}element'),
+ ('end', '{namespace}empty-element'),
+ ('end', '{namespace}root'),
+ ])
+ it.close()
+
+ def test_external_file(self):
+ with open(SIMPLE_XMLFILE, 'rb') as source:
+ it = ET.iterparse(source)
+ action, elem = next(it)
+ self.assertEqual((action, elem.tag), ('end', 'element'))
+ self.assertEqual([(action, elem.tag) for action, elem in it], [
+ ('end', 'element'),
+ ('end', 'empty-element'),
+ ('end', 'root'),
+ ])
+ self.assertEqual(it.root.tag, 'root')
+
+ def test_events(self):
+ iterparse = ET.iterparse
+
+ events = ()
+ it = iterparse(SIMPLE_XMLFILE, events)
+ self.assertEqual([(action, elem.tag) for action, elem in it], [])
+ it.close()
+
+ events = ()
+ it = iterparse(SIMPLE_XMLFILE, events=events)
+ self.assertEqual([(action, elem.tag) for action, elem in it], [])
+ it.close()
+
+ events = ("start", "end")
+ it = iterparse(SIMPLE_XMLFILE, events)
+ self.assertEqual([(action, elem.tag) for action, elem in it], [
+ ('start', 'root'),
+ ('start', 'element'),
+ ('end', 'element'),
+ ('start', 'element'),
+ ('end', 'element'),
+ ('start', 'empty-element'),
+ ('end', 'empty-element'),
+ ('end', 'root'),
+ ])
+ it.close()
+
+ def test_namespace_events(self):
+ iterparse = ET.iterparse
+
+ events = ("start", "end", "start-ns", "end-ns")
+ it = iterparse(SIMPLE_NS_XMLFILE, events)
+ self.assertEqual([(action, elem.tag) if action in ("start", "end")
+ else (action, elem)
+ for action, elem in it], [
+ ('start-ns', ('', 'namespace')),
+ ('start', '{namespace}root'),
+ ('start', '{namespace}element'),
+ ('end', '{namespace}element'),
+ ('start', '{namespace}element'),
+ ('end', '{namespace}element'),
+ ('start', '{namespace}empty-element'),
+ ('end', '{namespace}empty-element'),
+ ('end', '{namespace}root'),
+ ('end-ns', None),
+ ])
+ it.close()
+
+ events = ('start-ns', 'end-ns')
+ it = iterparse(io.BytesIO(br""), events)
+ res = [action for action, elem in it]
+ self.assertEqual(res, ['start-ns', 'end-ns'])
+ it.close()
+
+ def test_unknown_events(self):
+ iterparse = ET.iterparse
+
+ events = ("start", "end", "bogus")
+ with open(SIMPLE_XMLFILE, "rb") as f:
+ with self.assertRaises(ValueError) as cm:
+ iterparse(f, events)
+ self.assertFalse(f.closed)
+ self.assertEqual(str(cm.exception), "unknown event 'bogus'")
+
+ with warnings_helper.check_no_resource_warning(self):
+ with self.assertRaises(ValueError) as cm:
+ iterparse(SIMPLE_XMLFILE, events)
+ self.assertEqual(str(cm.exception), "unknown event 'bogus'")
+ del cm
+ gc_collect()
+
+ def test_non_utf8(self):
+ source = io.BytesIO(
+ b"\n"
+ b"text\n")
+ events = ("start-ns",)
+ it = ET.iterparse(source, events)
+ self.assertEqual([(action, elem) for action, elem in it], [
+ ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
+ ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
+ ])
+
+ def test_parsing_error(self):
+ source = io.BytesIO(b"junk")
+ it = ET.iterparse(source)
+ action, elem = next(it)
+ self.assertEqual((action, elem.tag), ('end', 'document'))
+ with self.assertRaises(ET.ParseError) as cm:
+ next(it)
+ self.assertEqual(str(cm.exception),
+ 'junk after document element: line 1, column 12')
+
+ def test_nonexistent_file(self):
+ with self.assertRaises(FileNotFoundError):
+ ET.iterparse("nonexistent")
+
+ def test_resource_warnings_not_exhausted(self):
+ # Not exhausting the iterator still closes the underlying file (bpo-43292)
+ it = ET.iterparse(SIMPLE_XMLFILE)
+ with warnings_helper.check_no_resource_warning(self):
+ del it
+ gc_collect()
+
+ it = ET.iterparse(SIMPLE_XMLFILE)
+ with warnings_helper.check_no_resource_warning(self):
+ action, elem = next(it)
+ self.assertEqual((action, elem.tag), ('end', 'element'))
+ del it, elem
+ gc_collect()
+
+ def test_resource_warnings_failed_iteration(self):
+ self.addCleanup(os_helper.unlink, TESTFN)
+ with open(TESTFN, "wb") as f:
+ f.write(b"junk")
+
+ it = ET.iterparse(TESTFN)
+ action, elem = next(it)
+ self.assertEqual((action, elem.tag), ('end', 'document'))
+ with warnings_helper.check_no_resource_warning(self):
+ with self.assertRaises(ET.ParseError) as cm:
+ next(it)
+ self.assertEqual(str(cm.exception),
+ 'junk after document element: line 1, column 12')
+ del cm, it
+ gc_collect()
+
+ def test_resource_warnings_exhausted(self):
+ it = ET.iterparse(SIMPLE_XMLFILE)
+ with warnings_helper.check_no_resource_warning(self):
+ list(it)
+ del it
+ gc_collect()
+
+ def test_close_not_exhausted(self):
+ iterparse = ET.iterparse
+
+ it = iterparse(SIMPLE_XMLFILE)
+ it.close()
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
+ with open(SIMPLE_XMLFILE, 'rb') as source:
+ it = iterparse(source)
+ it.close()
+ self.assertFalse(source.closed)
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
+ it = iterparse(SIMPLE_XMLFILE)
+ action, elem = next(it)
+ self.assertEqual((action, elem.tag), ('end', 'element'))
+ it.close()
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
+ with open(SIMPLE_XMLFILE, 'rb') as source:
+ it = iterparse(source)
+ action, elem = next(it)
+ self.assertEqual((action, elem.tag), ('end', 'element'))
+ it.close()
+ self.assertFalse(source.closed)
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
+ def test_close_exhausted(self):
+ iterparse = ET.iterparse
+ it = iterparse(SIMPLE_XMLFILE)
+ list(it)
+ it.close()
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
+ with open(SIMPLE_XMLFILE, 'rb') as source:
+ it = iterparse(source)
+ list(it)
+ it.close()
+ self.assertFalse(source.closed)
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
class XMLPullParserTest(unittest.TestCase):
@@ -1540,7 +1571,7 @@ class XMLPullParserTest(unittest.TestCase):
self.assertEqual([(action, elem.tag) for action, elem in events],
expected)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_simple_xml(self, chunk_size=None, flush=False):
parser = ET.XMLPullParser()
self.assert_event_tags(parser, [])
@@ -1562,19 +1593,19 @@ class XMLPullParserTest(unittest.TestCase):
self.assert_event_tags(parser, [('end', 'root')])
self.assertIsNone(parser.close())
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_simple_xml_chunk_1(self):
self.test_simple_xml(chunk_size=1, flush=True)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_simple_xml_chunk_5(self):
self.test_simple_xml(chunk_size=5, flush=True)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_simple_xml_chunk_22(self):
self.test_simple_xml(chunk_size=22)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_feed_while_iterating(self):
parser = ET.XMLPullParser()
it = parser.read_events()
@@ -1587,7 +1618,7 @@ class XMLPullParserTest(unittest.TestCase):
with self.assertRaises(StopIteration):
next(it)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_simple_xml_with_ns(self):
parser = ET.XMLPullParser()
self.assert_event_tags(parser, [])
@@ -1609,7 +1640,7 @@ class XMLPullParserTest(unittest.TestCase):
self.assert_event_tags(parser, [('end', '{namespace}root')])
self.assertIsNone(parser.close())
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_ns_events(self):
parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
self._feed(parser, "\n")
@@ -1625,7 +1656,7 @@ class XMLPullParserTest(unittest.TestCase):
self.assertEqual(list(parser.read_events()), [('end-ns', None)])
self.assertIsNone(parser.close())
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_ns_events_start(self):
parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
self._feed(parser, "\n")
@@ -1649,7 +1680,7 @@ class XMLPullParserTest(unittest.TestCase):
('end', '{abc}tag'),
])
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_ns_events_start_end(self):
parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
self._feed(parser, "\n")
@@ -1677,7 +1708,7 @@ class XMLPullParserTest(unittest.TestCase):
('end-ns', None),
])
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_events(self):
parser = ET.XMLPullParser(events=())
self._feed(parser, "\n")
@@ -1724,7 +1755,7 @@ class XMLPullParserTest(unittest.TestCase):
self._feed(parser, "")
self.assertIsNone(parser.close())
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_events_comment(self):
parser = ET.XMLPullParser(events=('start', 'comment', 'end'))
self._feed(parser, "\n")
@@ -1744,7 +1775,7 @@ class XMLPullParserTest(unittest.TestCase):
self._feed(parser, "\n")
self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_events_pi(self):
parser = ET.XMLPullParser(events=('start', 'pi', 'end'))
self._feed(parser, "\n")
@@ -1775,8 +1806,10 @@ class XMLPullParserTest(unittest.TestCase):
def test_unknown_event(self):
with self.assertRaises(ValueError):
ET.XMLPullParser(events=('start', 'end', 'bogus'))
+ with self.assertRaisesRegex(ValueError, "unknown event 'bogus'"):
+ ET.XMLPullParser(events=(x.decode() for x in (b'start', b'end', b'bogus')))
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
@unittest.skipIf(pyexpat.version_info < (2, 6, 0),
f'Expat {pyexpat.version_info} does not '
'support reparse deferral')
@@ -1801,7 +1834,7 @@ class XMLPullParserTest(unittest.TestCase):
self.assert_event_tags(parser, [('end', 'doc')])
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_flush_reparse_deferral_disabled(self):
parser = ET.XMLPullParser(events=('start', 'end'))
@@ -1984,7 +2017,7 @@ class XIncludeTest(unittest.TestCase):
else:
return None
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_xinclude_default(self):
from xml.etree import ElementInclude
doc = self.xinclude_loader('default.xml')
@@ -1999,7 +2032,7 @@ class XIncludeTest(unittest.TestCase):
'\n'
'')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_xinclude(self):
from xml.etree import ElementInclude
@@ -2064,7 +2097,7 @@ class XIncludeTest(unittest.TestCase):
' \n'
'') # C5
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_xinclude_repeated(self):
from xml.etree import ElementInclude
@@ -2072,7 +2105,7 @@ class XIncludeTest(unittest.TestCase):
ElementInclude.include(document, self.xinclude_loader)
self.assertEqual(1+4*2, len(document.findall(".//p")))
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_xinclude_failures(self):
from xml.etree import ElementInclude
@@ -2177,7 +2210,7 @@ class BugsTest(unittest.TestCase):
elem.set("123", 123)
check(elem) # attribute value
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_bug_xmltoolkit25(self):
# typo in ElementTree.findtext
@@ -2201,7 +2234,7 @@ class BugsTest(unittest.TestCase):
ET.dump(tree)
self.assertEqual(stdout.getvalue(), '\n')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_bug_xmltoolkit39(self):
# non-ascii element and attribute names doesn't work
@@ -2236,7 +2269,7 @@ class BugsTest(unittest.TestCase):
b'舰')
self.assertEqual(serialize(e), '\u8230')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_bug_xmltoolkit55(self):
# make sure we're reporting the first error, not the last
@@ -2255,7 +2288,7 @@ class BugsTest(unittest.TestCase):
self.assertRaises(OSError, ET.parse, ExceptionFile())
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_bug_xmltoolkit62(self):
# Don't crash when using custom entities.
@@ -2288,7 +2321,7 @@ class BugsTest(unittest.TestCase):
xmltoolkit63()
self.assertEqual(sys.getrefcount(None), count)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_bug_200708_newline(self):
# Preserve newlines in attributes.
@@ -2404,7 +2437,7 @@ class BugsTest(unittest.TestCase):
b"\n"
b'tãg')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_issue6565(self):
elem = ET.XML("")
self.assertEqual(summarize_list(elem), ['tag'])
@@ -2450,7 +2483,7 @@ class BugsTest(unittest.TestCase):
self.assertIsInstance(e[0].tail, str)
self.assertEqual(e[0].tail, 'changed')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_lost_elem(self):
# Issue #25902: Borrowed element can disappear
class Tag:
@@ -2476,7 +2509,7 @@ class BugsTest(unittest.TestCase):
root = ET.XML(xml)
self.assertEqual(root.get('b'), text.decode('utf-8'))
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_expat224_utf8_bug(self):
# bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
# Check that Expat 2.2.4 fixed the bug.
@@ -2489,7 +2522,7 @@ class BugsTest(unittest.TestCase):
text = b'x' + b'\xc3\xa0' * 1024
self.check_expat224_utf8_bug(text)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_expat224_utf8_bug_file(self):
with open(UTF8_BUG_XMLFILE, 'rb') as fp:
raw = fp.read()
@@ -2639,7 +2672,7 @@ class BasicElementTest(ElementTestCase, unittest.TestCase):
e[:] = [E('bar')]
self.assertRaises(TypeError, copy.deepcopy, e)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_cyclic_gc(self):
class Dummy:
pass
@@ -2977,32 +3010,72 @@ class BadElementTest(ElementTestCase, unittest.TestCase):
elem = b.close()
self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
- def test_subscr(self):
- # Issue #27863
+ def test_subscr_with_clear(self):
+ # See https://github.com/python/cpython/issues/143200.
+ self.do_test_subscr_with_mutating_slice(use_clear_method=True)
+
+ def test_subscr_with_delete(self):
+ # See https://github.com/python/cpython/issues/72050.
+ self.do_test_subscr_with_mutating_slice(use_clear_method=False)
+
+ def do_test_subscr_with_mutating_slice(self, *, use_clear_method):
class X:
+ def __init__(self, i=0):
+ self.i = i
def __index__(self):
- del e[:]
- return 1
+ if use_clear_method:
+ e.clear()
+ else:
+ del e[:]
+ return self.i
- e = ET.Element('elem')
- e.append(ET.Element('child'))
- e[:X()] # shouldn't crash
+ for s in self.get_mutating_slices(X, 10):
+ with self.subTest(s):
+ e = ET.Element('elem')
+ e.extend([ET.Element(f'c{i}') for i in range(10)])
+ e[s] # shouldn't crash
- e.append(ET.Element('child'))
- e[0:10:X()] # shouldn't crash
+ def test_ass_subscr_with_mutating_slice(self):
+ # See https://github.com/python/cpython/issues/72050
+ # and https://github.com/python/cpython/issues/143200.
- def test_ass_subscr(self):
- # Issue #27863
class X:
+ def __init__(self, i=0):
+ self.i = i
def __index__(self):
e[:] = []
- return 1
+ return self.i
+
+ for s in self.get_mutating_slices(X, 10):
+ with self.subTest(s):
+ e = ET.Element('elem')
+ e.extend([ET.Element(f'c{i}') for i in range(10)])
+ e[s] = [] # shouldn't crash
+
+ def get_mutating_slices(self, index_class, n_children):
+ self.assertGreaterEqual(n_children, 10)
+ return [
+ slice(index_class(), None, None),
+ slice(index_class(2), None, None),
+ slice(None, index_class(), None),
+ slice(None, index_class(2), None),
+ slice(0, 2, index_class(1)),
+ slice(0, 2, index_class(2)),
+ slice(0, n_children, index_class(1)),
+ slice(0, n_children, index_class(2)),
+ slice(0, 2 * n_children, index_class(1)),
+ slice(0, 2 * n_children, index_class(2)),
+ ]
+
+ def test_ass_subscr_with_mutating_iterable_value(self):
+ class V:
+ def __iter__(self):
+ e.clear()
+ return iter([ET.Element('a'), ET.Element('b')])
e = ET.Element('elem')
- for _ in range(10):
- e.insert(0, ET.Element('child'))
-
- e[0:10:X()] = [] # shouldn't crash
+ e.extend([ET.Element(f'c{i}') for i in range(10)])
+ e[:] = V()
def test_treebuilder_start(self):
# Issue #27863
@@ -3235,7 +3308,7 @@ class ElementTreeTypeTest(unittest.TestCase):
class ElementFindTest(unittest.TestCase):
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_find_simple(self):
e = ET.XML(SAMPLE_XML)
self.assertEqual(e.find('tag').tag, 'tag')
@@ -3259,7 +3332,7 @@ class ElementFindTest(unittest.TestCase):
# Issue #16922
self.assertEqual(ET.XML('').findtext('empty'), '')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_find_xpath(self):
LINEAR_XML = '''
@@ -3282,7 +3355,7 @@ class ElementFindTest(unittest.TestCase):
self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_findall(self):
e = ET.XML(SAMPLE_XML)
e[2] = ET.XML(SAMPLE_SECTION)
@@ -3471,7 +3544,7 @@ class ElementFindTest(unittest.TestCase):
with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
e.findall('/tag')
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_find_through_ElementTree(self):
e = ET.XML(SAMPLE_XML)
self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
@@ -3674,7 +3747,7 @@ class TreeBuilderTest(unittest.TestCase):
a = parser.close()
self.assertEqual(a.text, "texttail")
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_late_tail_mix_pi_comments(self):
# Issue #37399: The tail of an ignored comment could overwrite the text before it.
# Test appending tails to comments/pis.
@@ -3787,7 +3860,7 @@ class TreeBuilderTest(unittest.TestCase):
pass
self._check_element_factory_class(MyElement)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_doctype(self):
class DoctypeParser:
_doctype = None
@@ -3865,7 +3938,7 @@ class XMLParserTest(unittest.TestCase):
parser.feed(self.sample2)
parser.close()
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_subclass_doctype(self):
_doctype = None
class MyParserWithDoctype(ET.XMLParser):
@@ -3907,7 +3980,7 @@ class XMLParserTest(unittest.TestCase):
parser.feed(self.sample2)
parser.close()
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_parse_string(self):
parser = ET.XMLParser(target=ET.TreeBuilder())
parser.feed(self.sample3)
@@ -4365,13 +4438,13 @@ class ParseErrorTest(unittest.TestCase):
except ET.ParseError as e:
return e
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_error_position(self):
self.assertEqual(self._get_error('foo').position, (1, 0))
self.assertEqual(self._get_error('&foo;').position, (1, 5))
self.assertEqual(self._get_error('foobar<').position, (1, 6))
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_error_code(self):
import xml.parsers.expat.errors as ERRORS
self.assertEqual(self._get_error('foo').code,
@@ -4431,7 +4504,7 @@ class NoAcceleratorTest(unittest.TestCase):
# --------------------------------------------------------------------
class BoolTest(unittest.TestCase):
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_warning(self):
e = ET.fromstring('')
msg = (
@@ -4461,7 +4534,7 @@ class C14NTest(unittest.TestCase):
#
# simple roundtrip tests (from c14n.py)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_simple_roundtrip(self):
# Basics
self.assertEqual(c14n_roundtrip(""), '')
@@ -4502,7 +4575,7 @@ class C14NTest(unittest.TestCase):
xml = ''
self.assertEqual(c14n_roundtrip(xml), xml)
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_c14n_exclusion(self):
xml = textwrap.dedent("""\
@@ -4583,7 +4656,7 @@ class C14NTest(unittest.TestCase):
# note that this uses generated C14N versions of the standard ET.write
# output, not roundtripped C14N (see above).
- @unittest.expectedFailure # TODO: RUSTPYTHON
+ @unittest.expectedFailure # TODO: RUSTPYTHON
def test_xml_c14n2(self):
datadir = findfile("c14n-20", subdir="xmltestdata")
full_path = partial(os.path.join, datadir)
diff --git a/Lib/xml/dom/__init__.py b/Lib/xml/dom/__init__.py
index 97cf9a642..dd7fb996a 100644
--- a/Lib/xml/dom/__init__.py
+++ b/Lib/xml/dom/__init__.py
@@ -137,4 +137,4 @@ XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
EMPTY_NAMESPACE = None
EMPTY_PREFIX = None
-from .domreg import getDOMImplementation, registerDOMImplementation
+from .domreg import getDOMImplementation, registerDOMImplementation # noqa: F401
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index db51f350e..16b33b901 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -292,13 +292,6 @@ def _append_child(self, node):
childNodes.append(node)
node.parentNode = self
-def _in_document(node):
- # return True iff node is part of a document tree
- while node is not None:
- if node.nodeType == Node.DOCUMENT_NODE:
- return True
- node = node.parentNode
- return False
def _write_data(writer, text, attr):
"Writes datachars to writer."
@@ -371,6 +364,7 @@ class Attr(Node):
def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
prefix=None):
self.ownerElement = None
+ self.ownerDocument = None
self._name = qName
self.namespaceURI = namespaceURI
self._prefix = prefix
@@ -696,6 +690,7 @@ class Element(Node):
def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
localName=None):
+ self.ownerDocument = None
self.parentNode = None
self.tagName = self.nodeName = tagName
self.prefix = prefix
@@ -1555,7 +1550,7 @@ def _clear_id_cache(node):
if node.nodeType == Node.DOCUMENT_NODE:
node._id_cache.clear()
node._id_search_stack = None
- elif _in_document(node):
+ elif node.ownerDocument:
node.ownerDocument._id_cache.clear()
node.ownerDocument._id_search_stack= None
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 9bb09ab54..dafe5b1b8 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -267,7 +267,11 @@ class Element:
"""
# assert iselement(element)
- self._children.remove(subelement)
+ try:
+ self._children.remove(subelement)
+ except ValueError:
+ # to align the error message with the C implementation
+ raise ValueError("Element.remove(x): element not found") from None
def find(self, path, namespaces=None):
"""Find first matching element by tag name or path.
diff --git a/Lib/xml/sax/__init__.py b/Lib/xml/sax/__init__.py
index b65731020..fe4582c6f 100644
--- a/Lib/xml/sax/__init__.py
+++ b/Lib/xml/sax/__init__.py
@@ -21,9 +21,9 @@ expatreader -- Driver that allows use of the Expat parser with SAX.
from .xmlreader import InputSource
from .handler import ContentHandler, ErrorHandler
-from ._exceptions import SAXException, SAXNotRecognizedException, \
- SAXParseException, SAXNotSupportedException, \
- SAXReaderNotAvailable
+from ._exceptions import (SAXException, SAXNotRecognizedException,
+ SAXParseException, SAXNotSupportedException,
+ SAXReaderNotAvailable)
def parse(source, handler, errorHandler=ErrorHandler()):
@@ -55,7 +55,7 @@ default_parser_list = ["xml.sax.expatreader"]
# tell modulefinder that importing sax potentially imports expatreader
_false = 0
if _false:
- import xml.sax.expatreader
+ import xml.sax.expatreader # noqa: F401
import os, sys
if not sys.flags.ignore_environment and "PY_SAX_PARSER" in os.environ:
@@ -92,3 +92,9 @@ def make_parser(parser_list=()):
def _create_parser(parser_name):
drv_module = __import__(parser_name,{},{},['create_parser'])
return drv_module.create_parser()
+
+
+__all__ = ['ContentHandler', 'ErrorHandler', 'InputSource', 'SAXException',
+ 'SAXNotRecognizedException', 'SAXNotSupportedException',
+ 'SAXParseException', 'SAXReaderNotAvailable',
+ 'default_parser_list', 'make_parser', 'parse', 'parseString']
diff --git a/Lib/xml/sax/handler.py b/Lib/xml/sax/handler.py
index e8d417e51..3183c3fe9 100644
--- a/Lib/xml/sax/handler.py
+++ b/Lib/xml/sax/handler.py
@@ -371,7 +371,7 @@ class LexicalHandler:
name is the name of the document element type, public_id the
public identifier of the DTD (or None if none were supplied)
- and system_id the system identfier of the external subset (or
+ and system_id the system identifier of the external subset (or
None if none were supplied)."""
def endDTD(self):