Add a bunch of stdlib modules from CPython

This commit is contained in:
coolreader18
2019-07-20 23:20:34 -05:00
parent 6768e3a343
commit 140d582599
45 changed files with 25681 additions and 0 deletions

140
Lib/__future__.py Normal file
View File

@@ -0,0 +1,140 @@
"""Record of phased-in incompatible language changes.
Each line is of the form:
FeatureName = "_Feature(" OptionalRelease "," MandatoryRelease ","
CompilerFlag ")"
where, normally, OptionalRelease < MandatoryRelease, and both are 5-tuples
of the same form as sys.version_info:
(PY_MAJOR_VERSION, # the 2 in 2.1.0a3; an int
PY_MINOR_VERSION, # the 1; an int
PY_MICRO_VERSION, # the 0; an int
PY_RELEASE_LEVEL, # "alpha", "beta", "candidate" or "final"; string
PY_RELEASE_SERIAL # the 3; an int
)
OptionalRelease records the first release in which
from __future__ import FeatureName
was accepted.
In the case of MandatoryReleases that have not yet occurred,
MandatoryRelease predicts the release in which the feature will become part
of the language.
Else MandatoryRelease records when the feature became part of the language;
in releases at or after that, modules no longer need
from __future__ import FeatureName
to use the feature in question, but may continue to use such imports.
MandatoryRelease may also be None, meaning that a planned feature got
dropped.
Instances of class _Feature have two corresponding methods,
.getOptionalRelease() and .getMandatoryRelease().
CompilerFlag is the (bitfield) flag that should be passed in the fourth
argument to the builtin function compile() to enable the feature in
dynamically compiled code. This flag is stored in the .compiler_flag
attribute on _Future instances. These values must match the appropriate
#defines of CO_xxx flags in Include/compile.h.
No feature line is ever to be deleted from this file.
"""
all_feature_names = [
"nested_scopes",
"generators",
"division",
"absolute_import",
"with_statement",
"print_function",
"unicode_literals",
"barry_as_FLUFL",
"generator_stop",
]
__all__ = ["all_feature_names"] + all_feature_names
# The CO_xxx symbols are defined here under the same names used by
# compile.h, so that an editor search will find them here. However,
# they're not exported in __all__, because they don't really belong to
# this module.
CO_NESTED = 0x0010 # nested_scopes
CO_GENERATOR_ALLOWED = 0 # generators (obsolete, was 0x1000)
CO_FUTURE_DIVISION = 0x2000 # division
CO_FUTURE_ABSOLUTE_IMPORT = 0x4000 # perform absolute imports by default
CO_FUTURE_WITH_STATEMENT = 0x8000 # with statement
CO_FUTURE_PRINT_FUNCTION = 0x10000 # print function
CO_FUTURE_UNICODE_LITERALS = 0x20000 # unicode string literals
CO_FUTURE_BARRY_AS_BDFL = 0x40000
CO_FUTURE_GENERATOR_STOP = 0x80000 # StopIteration becomes RuntimeError in generators
class _Feature:
def __init__(self, optionalRelease, mandatoryRelease, compiler_flag):
self.optional = optionalRelease
self.mandatory = mandatoryRelease
self.compiler_flag = compiler_flag
def getOptionalRelease(self):
"""Return first release in which this feature was recognized.
This is a 5-tuple, of the same form as sys.version_info.
"""
return self.optional
def getMandatoryRelease(self):
"""Return release in which this feature will become mandatory.
This is a 5-tuple, of the same form as sys.version_info, or, if
the feature was dropped, is None.
"""
return self.mandatory
def __repr__(self):
return "_Feature" + repr((self.optional,
self.mandatory,
self.compiler_flag))
nested_scopes = _Feature((2, 1, 0, "beta", 1),
(2, 2, 0, "alpha", 0),
CO_NESTED)
generators = _Feature((2, 2, 0, "alpha", 1),
(2, 3, 0, "final", 0),
CO_GENERATOR_ALLOWED)
division = _Feature((2, 2, 0, "alpha", 2),
(3, 0, 0, "alpha", 0),
CO_FUTURE_DIVISION)
absolute_import = _Feature((2, 5, 0, "alpha", 1),
(3, 0, 0, "alpha", 0),
CO_FUTURE_ABSOLUTE_IMPORT)
with_statement = _Feature((2, 5, 0, "alpha", 1),
(2, 6, 0, "alpha", 0),
CO_FUTURE_WITH_STATEMENT)
print_function = _Feature((2, 6, 0, "alpha", 2),
(3, 0, 0, "alpha", 0),
CO_FUTURE_PRINT_FUNCTION)
unicode_literals = _Feature((2, 6, 0, "alpha", 2),
(3, 0, 0, "alpha", 0),
CO_FUTURE_UNICODE_LITERALS)
barry_as_FLUFL = _Feature((3, 1, 0, "alpha", 2),
(3, 9, 0, "alpha", 0),
CO_FUTURE_BARRY_AS_BDFL)
generator_stop = _Feature((3, 5, 0, "beta", 1),
(3, 7, 0, "alpha", 0),
CO_FUTURE_GENERATOR_STOP)

251
Lib/_compat_pickle.py Normal file
View File

@@ -0,0 +1,251 @@
# This module is used to map the old Python 2 names to the new names used in
# Python 3 for the pickle module. This needed to make pickle streams
# generated with Python 2 loadable by Python 3.
# This is a copy of lib2to3.fixes.fix_imports.MAPPING. We cannot import
# lib2to3 and use the mapping defined there, because lib2to3 uses pickle.
# Thus, this could cause the module to be imported recursively.
IMPORT_MAPPING = {
'__builtin__' : 'builtins',
'copy_reg': 'copyreg',
'Queue': 'queue',
'SocketServer': 'socketserver',
'ConfigParser': 'configparser',
'repr': 'reprlib',
'tkFileDialog': 'tkinter.filedialog',
'tkSimpleDialog': 'tkinter.simpledialog',
'tkColorChooser': 'tkinter.colorchooser',
'tkCommonDialog': 'tkinter.commondialog',
'Dialog': 'tkinter.dialog',
'Tkdnd': 'tkinter.dnd',
'tkFont': 'tkinter.font',
'tkMessageBox': 'tkinter.messagebox',
'ScrolledText': 'tkinter.scrolledtext',
'Tkconstants': 'tkinter.constants',
'Tix': 'tkinter.tix',
'ttk': 'tkinter.ttk',
'Tkinter': 'tkinter',
'markupbase': '_markupbase',
'_winreg': 'winreg',
'thread': '_thread',
'dummy_thread': '_dummy_thread',
'dbhash': 'dbm.bsd',
'dumbdbm': 'dbm.dumb',
'dbm': 'dbm.ndbm',
'gdbm': 'dbm.gnu',
'xmlrpclib': 'xmlrpc.client',
'SimpleXMLRPCServer': 'xmlrpc.server',
'httplib': 'http.client',
'htmlentitydefs' : 'html.entities',
'HTMLParser' : 'html.parser',
'Cookie': 'http.cookies',
'cookielib': 'http.cookiejar',
'BaseHTTPServer': 'http.server',
'test.test_support': 'test.support',
'commands': 'subprocess',
'urlparse' : 'urllib.parse',
'robotparser' : 'urllib.robotparser',
'urllib2': 'urllib.request',
'anydbm': 'dbm',
'_abcoll' : 'collections.abc',
}
# This contains rename rules that are easy to handle. We ignore the more
# complex stuff (e.g. mapping the names in the urllib and types modules).
# These rules should be run before import names are fixed.
NAME_MAPPING = {
('__builtin__', 'xrange'): ('builtins', 'range'),
('__builtin__', 'reduce'): ('functools', 'reduce'),
('__builtin__', 'intern'): ('sys', 'intern'),
('__builtin__', 'unichr'): ('builtins', 'chr'),
('__builtin__', 'unicode'): ('builtins', 'str'),
('__builtin__', 'long'): ('builtins', 'int'),
('itertools', 'izip'): ('builtins', 'zip'),
('itertools', 'imap'): ('builtins', 'map'),
('itertools', 'ifilter'): ('builtins', 'filter'),
('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'),
('itertools', 'izip_longest'): ('itertools', 'zip_longest'),
('UserDict', 'IterableUserDict'): ('collections', 'UserDict'),
('UserList', 'UserList'): ('collections', 'UserList'),
('UserString', 'UserString'): ('collections', 'UserString'),
('whichdb', 'whichdb'): ('dbm', 'whichdb'),
('_socket', 'fromfd'): ('socket', 'fromfd'),
('_multiprocessing', 'Connection'): ('multiprocessing.connection', 'Connection'),
('multiprocessing.process', 'Process'): ('multiprocessing.context', 'Process'),
('multiprocessing.forking', 'Popen'): ('multiprocessing.popen_fork', 'Popen'),
('urllib', 'ContentTooShortError'): ('urllib.error', 'ContentTooShortError'),
('urllib', 'getproxies'): ('urllib.request', 'getproxies'),
('urllib', 'pathname2url'): ('urllib.request', 'pathname2url'),
('urllib', 'quote_plus'): ('urllib.parse', 'quote_plus'),
('urllib', 'quote'): ('urllib.parse', 'quote'),
('urllib', 'unquote_plus'): ('urllib.parse', 'unquote_plus'),
('urllib', 'unquote'): ('urllib.parse', 'unquote'),
('urllib', 'url2pathname'): ('urllib.request', 'url2pathname'),
('urllib', 'urlcleanup'): ('urllib.request', 'urlcleanup'),
('urllib', 'urlencode'): ('urllib.parse', 'urlencode'),
('urllib', 'urlopen'): ('urllib.request', 'urlopen'),
('urllib', 'urlretrieve'): ('urllib.request', 'urlretrieve'),
('urllib2', 'HTTPError'): ('urllib.error', 'HTTPError'),
('urllib2', 'URLError'): ('urllib.error', 'URLError'),
}
PYTHON2_EXCEPTIONS = (
"ArithmeticError",
"AssertionError",
"AttributeError",
"BaseException",
"BufferError",
"BytesWarning",
"DeprecationWarning",
"EOFError",
"EnvironmentError",
"Exception",
"FloatingPointError",
"FutureWarning",
"GeneratorExit",
"IOError",
"ImportError",
"ImportWarning",
"IndentationError",
"IndexError",
"KeyError",
"KeyboardInterrupt",
"LookupError",
"MemoryError",
"NameError",
"NotImplementedError",
"OSError",
"OverflowError",
"PendingDeprecationWarning",
"ReferenceError",
"RuntimeError",
"RuntimeWarning",
# StandardError is gone in Python 3, so we map it to Exception
"StopIteration",
"SyntaxError",
"SyntaxWarning",
"SystemError",
"SystemExit",
"TabError",
"TypeError",
"UnboundLocalError",
"UnicodeDecodeError",
"UnicodeEncodeError",
"UnicodeError",
"UnicodeTranslateError",
"UnicodeWarning",
"UserWarning",
"ValueError",
"Warning",
"ZeroDivisionError",
)
try:
WindowsError
except NameError:
pass
else:
PYTHON2_EXCEPTIONS += ("WindowsError",)
for excname in PYTHON2_EXCEPTIONS:
NAME_MAPPING[("exceptions", excname)] = ("builtins", excname)
MULTIPROCESSING_EXCEPTIONS = (
'AuthenticationError',
'BufferTooShort',
'ProcessError',
'TimeoutError',
)
for excname in MULTIPROCESSING_EXCEPTIONS:
NAME_MAPPING[("multiprocessing", excname)] = ("multiprocessing.context", excname)
# Same, but for 3.x to 2.x
REVERSE_IMPORT_MAPPING = dict((v, k) for (k, v) in IMPORT_MAPPING.items())
assert len(REVERSE_IMPORT_MAPPING) == len(IMPORT_MAPPING)
REVERSE_NAME_MAPPING = dict((v, k) for (k, v) in NAME_MAPPING.items())
assert len(REVERSE_NAME_MAPPING) == len(NAME_MAPPING)
# Non-mutual mappings.
IMPORT_MAPPING.update({
'cPickle': 'pickle',
'_elementtree': 'xml.etree.ElementTree',
'FileDialog': 'tkinter.filedialog',
'SimpleDialog': 'tkinter.simpledialog',
'DocXMLRPCServer': 'xmlrpc.server',
'SimpleHTTPServer': 'http.server',
'CGIHTTPServer': 'http.server',
# For compatibility with broken pickles saved in old Python 3 versions
'UserDict': 'collections',
'UserList': 'collections',
'UserString': 'collections',
'whichdb': 'dbm',
'StringIO': 'io',
'cStringIO': 'io',
})
REVERSE_IMPORT_MAPPING.update({
'_bz2': 'bz2',
'_dbm': 'dbm',
'_functools': 'functools',
'_gdbm': 'gdbm',
'_pickle': 'pickle',
})
NAME_MAPPING.update({
('__builtin__', 'basestring'): ('builtins', 'str'),
('exceptions', 'StandardError'): ('builtins', 'Exception'),
('UserDict', 'UserDict'): ('collections', 'UserDict'),
('socket', '_socketobject'): ('socket', 'SocketType'),
})
REVERSE_NAME_MAPPING.update({
('_functools', 'reduce'): ('__builtin__', 'reduce'),
('tkinter.filedialog', 'FileDialog'): ('FileDialog', 'FileDialog'),
('tkinter.filedialog', 'LoadFileDialog'): ('FileDialog', 'LoadFileDialog'),
('tkinter.filedialog', 'SaveFileDialog'): ('FileDialog', 'SaveFileDialog'),
('tkinter.simpledialog', 'SimpleDialog'): ('SimpleDialog', 'SimpleDialog'),
('xmlrpc.server', 'ServerHTMLDoc'): ('DocXMLRPCServer', 'ServerHTMLDoc'),
('xmlrpc.server', 'XMLRPCDocGenerator'):
('DocXMLRPCServer', 'XMLRPCDocGenerator'),
('xmlrpc.server', 'DocXMLRPCRequestHandler'):
('DocXMLRPCServer', 'DocXMLRPCRequestHandler'),
('xmlrpc.server', 'DocXMLRPCServer'):
('DocXMLRPCServer', 'DocXMLRPCServer'),
('xmlrpc.server', 'DocCGIXMLRPCRequestHandler'):
('DocXMLRPCServer', 'DocCGIXMLRPCRequestHandler'),
('http.server', 'SimpleHTTPRequestHandler'):
('SimpleHTTPServer', 'SimpleHTTPRequestHandler'),
('http.server', 'CGIHTTPRequestHandler'):
('CGIHTTPServer', 'CGIHTTPRequestHandler'),
('_socket', 'socket'): ('socket', '_socketobject'),
})
PYTHON3_OSERROR_EXCEPTIONS = (
'BrokenPipeError',
'ChildProcessError',
'ConnectionAbortedError',
'ConnectionError',
'ConnectionRefusedError',
'ConnectionResetError',
'FileExistsError',
'FileNotFoundError',
'InterruptedError',
'IsADirectoryError',
'NotADirectoryError',
'PermissionError',
'ProcessLookupError',
'TimeoutError',
)
for excname in PYTHON3_OSERROR_EXCEPTIONS:
REVERSE_NAME_MAPPING[('builtins', excname)] = ('exceptions', 'OSError')
PYTHON3_IMPORTERROR_EXCEPTIONS = (
'ModuleNotFoundError',
)
for excname in PYTHON3_IMPORTERROR_EXCEPTIONS:
REVERSE_NAME_MAPPING[('builtins', excname)] = ('exceptions', 'ImportError')

163
Lib/_dummy_thread.py Normal file
View File

@@ -0,0 +1,163 @@
"""Drop-in replacement for the thread module.
Meant to be used as a brain-dead substitute so that threaded code does
not need to be rewritten for when the thread module is not present.
Suggested usage is::
try:
import _thread
except ImportError:
import _dummy_thread as _thread
"""
# Exports only things specified by thread documentation;
# skipping obsolete synonyms allocate(), start_new(), exit_thread().
__all__ = ['error', 'start_new_thread', 'exit', 'get_ident', 'allocate_lock',
'interrupt_main', 'LockType']
# A dummy value
TIMEOUT_MAX = 2**31
# NOTE: this module can be imported early in the extension building process,
# and so top level imports of other modules should be avoided. Instead, all
# imports are done when needed on a function-by-function basis. Since threads
# are disabled, the import lock should not be an issue anyway (??).
error = RuntimeError
def start_new_thread(function, args, kwargs={}):
"""Dummy implementation of _thread.start_new_thread().
Compatibility is maintained by making sure that ``args`` is a
tuple and ``kwargs`` is a dictionary. If an exception is raised
and it is SystemExit (which can be done by _thread.exit()) it is
caught and nothing is done; all other exceptions are printed out
by using traceback.print_exc().
If the executed function calls interrupt_main the KeyboardInterrupt will be
raised when the function returns.
"""
if type(args) != type(tuple()):
raise TypeError("2nd arg must be a tuple")
if type(kwargs) != type(dict()):
raise TypeError("3rd arg must be a dict")
global _main
_main = False
try:
function(*args, **kwargs)
except SystemExit:
pass
except:
import traceback
traceback.print_exc()
_main = True
global _interrupt
if _interrupt:
_interrupt = False
raise KeyboardInterrupt
def exit():
"""Dummy implementation of _thread.exit()."""
raise SystemExit
def get_ident():
"""Dummy implementation of _thread.get_ident().
Since this module should only be used when _threadmodule is not
available, it is safe to assume that the current process is the
only thread. Thus a constant can be safely returned.
"""
return -1
def allocate_lock():
"""Dummy implementation of _thread.allocate_lock()."""
return LockType()
def stack_size(size=None):
"""Dummy implementation of _thread.stack_size()."""
if size is not None:
raise error("setting thread stack size not supported")
return 0
def _set_sentinel():
"""Dummy implementation of _thread._set_sentinel()."""
return LockType()
class LockType(object):
"""Class implementing dummy implementation of _thread.LockType.
Compatibility is maintained by maintaining self.locked_status
which is a boolean that stores the state of the lock. Pickling of
the lock, though, should not be done since if the _thread module is
then used with an unpickled ``lock()`` from here problems could
occur from this class not having atomic methods.
"""
def __init__(self):
self.locked_status = False
def acquire(self, waitflag=None, timeout=-1):
"""Dummy implementation of acquire().
For blocking calls, self.locked_status is automatically set to
True and returned appropriately based on value of
``waitflag``. If it is non-blocking, then the value is
actually checked and not set if it is already acquired. This
is all done so that threading.Condition's assert statements
aren't triggered and throw a little fit.
"""
if waitflag is None or waitflag:
self.locked_status = True
return True
else:
if not self.locked_status:
self.locked_status = True
return True
else:
if timeout > 0:
import time
time.sleep(timeout)
return False
__enter__ = acquire
def __exit__(self, typ, val, tb):
self.release()
def release(self):
"""Release the dummy lock."""
# XXX Perhaps shouldn't actually bother to test? Could lead
# to problems for complex, threaded code.
if not self.locked_status:
raise error
self.locked_status = False
return True
def locked(self):
return self.locked_status
def __repr__(self):
return "<%s %s.%s object at %s>" % (
"locked" if self.locked_status else "unlocked",
self.__class__.__module__,
self.__class__.__qualname__,
hex(id(self))
)
# Used to signal that interrupt_main was called in a "thread"
_interrupt = False
# True when not executing in a "thread"
_main = True
def interrupt_main():
"""Set _interrupt flag to True to have start_new_thread raise
KeyboardInterrupt upon exiting."""
if _main:
raise KeyboardInterrupt
else:
global _interrupt
_interrupt = True

395
Lib/_markupbase.py Normal file
View File

@@ -0,0 +1,395 @@
"""Shared support for scanning document type declarations in HTML and XHTML.
This module is used as a foundation for the html.parser module. It has no
documented public API and should not be used directly.
"""
import re
_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
_commentclose = re.compile(r'--\s*>')
_markedsectionclose = re.compile(r']\s*]\s*>')
# An analysis of the MS-Word extensions is available at
# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf
_msmarkedsectionclose = re.compile(r']\s*>')
del re
class ParserBase:
"""Parser base class which provides some common support methods used
by the SGML/HTML and XHTML parsers."""
def __init__(self):
if self.__class__ is ParserBase:
raise RuntimeError(
"_markupbase.ParserBase must be subclassed")
def error(self, message):
raise NotImplementedError(
"subclasses of ParserBase must override error()")
def reset(self):
self.lineno = 1
self.offset = 0
def getpos(self):
"""Return current line number and offset."""
return self.lineno, self.offset
# Internal -- update line number and offset. This should be
# called for each piece of data exactly once, in order -- in other
# words the concatenation of all the input strings to this
# function should be exactly the entire input.
def updatepos(self, i, j):
if i >= j:
return j
rawdata = self.rawdata
nlines = rawdata.count("\n", i, j)
if nlines:
self.lineno = self.lineno + nlines
pos = rawdata.rindex("\n", i, j) # Should not fail
self.offset = j-(pos+1)
else:
self.offset = self.offset + j-i
return j
_decl_otherchars = ''
# Internal -- parse declaration (for use by subclasses).
def parse_declaration(self, i):
# This is some sort of declaration; in "HTML as
# deployed," this should only be the document type
# declaration ("<!DOCTYPE html...>").
# ISO 8879:1986, however, has more complex
# declaration syntax for elements in <!...>, including:
# --comment--
# [marked section]
# name in the following list: ENTITY, DOCTYPE, ELEMENT,
# ATTLIST, NOTATION, SHORTREF, USEMAP,
# LINKTYPE, LINK, IDLINK, USELINK, SYSTEM
rawdata = self.rawdata
j = i + 2
assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
if rawdata[j:j+1] == ">":
# the empty comment <!>
return j + 1
if rawdata[j:j+1] in ("-", ""):
# Start of comment followed by buffer boundary,
# or just a buffer boundary.
return -1
# A simple, practical version could look like: ((name|stringlit) S*) + '>'
n = len(rawdata)
if rawdata[j:j+2] == '--': #comment
# Locate --.*-- as the body of the comment
return self.parse_comment(i)
elif rawdata[j] == '[': #marked section
# Locate [statusWord [...arbitrary SGML...]] as the body of the marked section
# Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA
# Note that this is extended by Microsoft Office "Save as Web" function
# to include [if...] and [endif].
return self.parse_marked_section(i)
else: #all other declaration elements
decltype, j = self._scan_name(j, i)
if j < 0:
return j
if decltype == "doctype":
self._decl_otherchars = ''
while j < n:
c = rawdata[j]
if c == ">":
# end of declaration syntax
data = rawdata[i+2:j]
if decltype == "doctype":
self.handle_decl(data)
else:
# According to the HTML5 specs sections "8.2.4.44 Bogus
# comment state" and "8.2.4.45 Markup declaration open
# state", a comment token should be emitted.
# Calling unknown_decl provides more flexibility though.
self.unknown_decl(data)
return j + 1
if c in "\"'":
m = _declstringlit_match(rawdata, j)
if not m:
return -1 # incomplete
j = m.end()
elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
name, j = self._scan_name(j, i)
elif c in self._decl_otherchars:
j = j + 1
elif c == "[":
# this could be handled in a separate doctype parser
if decltype == "doctype":
j = self._parse_doctype_subset(j + 1, i)
elif decltype in {"attlist", "linktype", "link", "element"}:
# must tolerate []'d groups in a content model in an element declaration
# also in data attribute specifications of attlist declaration
# also link type declaration subsets in linktype declarations
# also link attribute specification lists in link declarations
self.error("unsupported '[' char in %s declaration" % decltype)
else:
self.error("unexpected '[' char in declaration")
else:
self.error(
"unexpected %r char in declaration" % rawdata[j])
if j < 0:
return j
return -1 # incomplete
# Internal -- parse a marked section
# Override this to handle MS-word extension syntax <![if word]>content<![endif]>
def parse_marked_section(self, i, report=1):
rawdata= self.rawdata
assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
sectName, j = self._scan_name( i+3, i )
if j < 0:
return j
if sectName in {"temp", "cdata", "ignore", "include", "rcdata"}:
# look for standard ]]> ending
match= _markedsectionclose.search(rawdata, i+3)
elif sectName in {"if", "else", "endif"}:
# look for MS Office ]> ending
match= _msmarkedsectionclose.search(rawdata, i+3)
else:
self.error('unknown status keyword %r in marked section' % rawdata[i+3:j])
if not match:
return -1
if report:
j = match.start(0)
self.unknown_decl(rawdata[i+3: j])
return match.end(0)
# Internal -- parse comment, return length or -1 if not terminated
def parse_comment(self, i, report=1):
rawdata = self.rawdata
if rawdata[i:i+4] != '<!--':
self.error('unexpected call to parse_comment()')
match = _commentclose.search(rawdata, i+4)
if not match:
return -1
if report:
j = match.start(0)
self.handle_comment(rawdata[i+4: j])
return match.end(0)
# Internal -- scan past the internal subset in a <!DOCTYPE declaration,
# returning the index just past any whitespace following the trailing ']'.
def _parse_doctype_subset(self, i, declstartpos):
rawdata = self.rawdata
n = len(rawdata)
j = i
while j < n:
c = rawdata[j]
if c == "<":
s = rawdata[j:j+2]
if s == "<":
# end of buffer; incomplete
return -1
if s != "<!":
self.updatepos(declstartpos, j + 1)
self.error("unexpected char in internal subset (in %r)" % s)
if (j + 2) == n:
# end of buffer; incomplete
return -1
if (j + 4) > n:
# end of buffer; incomplete
return -1
if rawdata[j:j+4] == "<!--":
j = self.parse_comment(j, report=0)
if j < 0:
return j
continue
name, j = self._scan_name(j + 2, declstartpos)
if j == -1:
return -1
if name not in {"attlist", "element", "entity", "notation"}:
self.updatepos(declstartpos, j + 2)
self.error(
"unknown declaration %r in internal subset" % name)
# handle the individual names
meth = getattr(self, "_parse_doctype_" + name)
j = meth(j, declstartpos)
if j < 0:
return j
elif c == "%":
# parameter entity reference
if (j + 1) == n:
# end of buffer; incomplete
return -1
s, j = self._scan_name(j + 1, declstartpos)
if j < 0:
return j
if rawdata[j] == ";":
j = j + 1
elif c == "]":
j = j + 1
while j < n and rawdata[j].isspace():
j = j + 1
if j < n:
if rawdata[j] == ">":
return j
self.updatepos(declstartpos, j)
self.error("unexpected char after internal subset")
else:
return -1
elif c.isspace():
j = j + 1
else:
self.updatepos(declstartpos, j)
self.error("unexpected char %r in internal subset" % c)
# end of buffer reached
return -1
# Internal -- scan past <!ELEMENT declarations
def _parse_doctype_element(self, i, declstartpos):
name, j = self._scan_name(i, declstartpos)
if j == -1:
return -1
# style content model; just skip until '>'
rawdata = self.rawdata
if '>' in rawdata[j:]:
return rawdata.find(">", j) + 1
return -1
# Internal -- scan past <!ATTLIST declarations
def _parse_doctype_attlist(self, i, declstartpos):
rawdata = self.rawdata
name, j = self._scan_name(i, declstartpos)
c = rawdata[j:j+1]
if c == "":
return -1
if c == ">":
return j + 1
while 1:
# scan a series of attribute descriptions; simplified:
# name type [value] [#constraint]
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
c = rawdata[j:j+1]
if c == "":
return -1
if c == "(":
# an enumerated type; look for ')'
if ")" in rawdata[j:]:
j = rawdata.find(")", j) + 1
else:
return -1
while rawdata[j:j+1].isspace():
j = j + 1
if not rawdata[j:]:
# end of buffer, incomplete
return -1
else:
name, j = self._scan_name(j, declstartpos)
c = rawdata[j:j+1]
if not c:
return -1
if c in "'\"":
m = _declstringlit_match(rawdata, j)
if m:
j = m.end()
else:
return -1
c = rawdata[j:j+1]
if not c:
return -1
if c == "#":
if rawdata[j:] == "#":
# end of buffer
return -1
name, j = self._scan_name(j + 1, declstartpos)
if j < 0:
return j
c = rawdata[j:j+1]
if not c:
return -1
if c == '>':
# all done
return j + 1
# Internal -- scan past <!NOTATION declarations
def _parse_doctype_notation(self, i, declstartpos):
name, j = self._scan_name(i, declstartpos)
if j < 0:
return j
rawdata = self.rawdata
while 1:
c = rawdata[j:j+1]
if not c:
# end of buffer; incomplete
return -1
if c == '>':
return j + 1
if c in "'\"":
m = _declstringlit_match(rawdata, j)
if not m:
return -1
j = m.end()
else:
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
# Internal -- scan past <!ENTITY declarations
def _parse_doctype_entity(self, i, declstartpos):
rawdata = self.rawdata
if rawdata[i:i+1] == "%":
j = i + 1
while 1:
c = rawdata[j:j+1]
if not c:
return -1
if c.isspace():
j = j + 1
else:
break
else:
j = i
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
while 1:
c = self.rawdata[j:j+1]
if not c:
return -1
if c in "'\"":
m = _declstringlit_match(rawdata, j)
if m:
j = m.end()
else:
return -1 # incomplete
elif c == ">":
return j + 1
else:
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
# Internal -- scan a name token and the new position and the token, or
# return -1 if we've reached the end of the buffer.
def _scan_name(self, i, declstartpos):
rawdata = self.rawdata
n = len(rawdata)
if i == n:
return None, -1
m = _declname_match(rawdata, i)
if m:
s = m.group()
name = s.strip()
if (i + len(s)) == n:
return None, -1 # end of buffer
return name.lower(), m.end()
else:
self.updatepos(declstartpos, i)
self.error("expected name token at %r"
% rawdata[declstartpos:declstartpos+20])
# To be overridden -- handlers for unknown objects
def unknown_decl(self, data):
pass

103
Lib/_sitebuiltins.py Normal file
View File

@@ -0,0 +1,103 @@
"""
The objects used by the site module to add custom builtins.
"""
# Those objects are almost immortal and they keep a reference to their module
# globals. Defining them in the site module would keep too many references
# alive.
# Note this means this module should also avoid keep things alive in its
# globals.
import sys
class Quitter(object):
def __init__(self, name, eof):
self.name = name
self.eof = eof
def __repr__(self):
return 'Use %s() or %s to exit' % (self.name, self.eof)
def __call__(self, code=None):
# Shells like IDLE catch the SystemExit, but listen when their
# stdin wrapper is closed.
try:
sys.stdin.close()
except:
pass
raise SystemExit(code)
class _Printer(object):
"""interactive prompt objects for printing the license text, a list of
contributors and the copyright notice."""
MAXLINES = 23
def __init__(self, name, data, files=(), dirs=()):
import os
self.__name = name
self.__data = data
self.__lines = None
self.__filenames = [os.path.join(dir, filename)
for dir in dirs
for filename in files]
def __setup(self):
if self.__lines:
return
data = None
for filename in self.__filenames:
try:
with open(filename, "r") as fp:
data = fp.read()
break
except OSError:
pass
if not data:
data = self.__data
self.__lines = data.split('\n')
self.__linecnt = len(self.__lines)
def __repr__(self):
self.__setup()
if len(self.__lines) <= self.MAXLINES:
return "\n".join(self.__lines)
else:
return "Type %s() to see the full %s text" % ((self.__name,)*2)
def __call__(self):
self.__setup()
prompt = 'Hit Return for more, or q (and Return) to quit: '
lineno = 0
while 1:
try:
for i in range(lineno, lineno + self.MAXLINES):
print(self.__lines[i])
except IndexError:
break
else:
lineno += self.MAXLINES
key = None
while key is None:
key = input(prompt)
if key not in ('', 'q'):
key = None
if key == 'q':
break
class _Helper(object):
"""Define the builtin 'help'.
This is a wrapper around pydoc.help that provides a helpful message
when 'help' is typed at the Python interactive prompt.
Calling help() at the Python prompt starts an interactive help session.
Calling help(thing) prints help for the python object 'thing'.
"""
def __repr__(self):
return "Type help() for interactive help, " \
"or help(object) for help about object."
def __call__(self, *args, **kwds):
import pydoc
return pydoc.help(*args, **kwds)

595
Lib/base64.py Executable file
View File

@@ -0,0 +1,595 @@
#! /usr/bin/python3.6
"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
# Modified 04-Oct-1995 by Jack Jansen to use binascii module
# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
import re
import struct
import binascii
__all__ = [
# Legacy interface exports traditional RFC 2045 Base64 encodings
'encode', 'decode', 'encodebytes', 'decodebytes',
# Generalized interface for other encodings
'b64encode', 'b64decode', 'b32encode', 'b32decode',
'b16encode', 'b16decode',
# Base85 and Ascii85 encodings
'b85encode', 'b85decode', 'a85encode', 'a85decode',
# Standard Base64 encoding
'standard_b64encode', 'standard_b64decode',
# Some common Base64 alternatives. As referenced by RFC 3458, see thread
# starting at:
#
# http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
'urlsafe_b64encode', 'urlsafe_b64decode',
]
bytes_types = (bytes, bytearray) # Types acceptable as binary data
def _bytes_from_decode_data(s):
if isinstance(s, str):
try:
return s.encode('ascii')
except UnicodeEncodeError:
raise ValueError('string argument should contain only ASCII characters')
if isinstance(s, bytes_types):
return s
try:
return memoryview(s).tobytes()
except TypeError:
raise TypeError("argument should be a bytes-like object or ASCII "
"string, not %r" % s.__class__.__name__) from None
# Base64 encoding/decoding uses binascii
def b64encode(s, altchars=None):
"""Encode the bytes-like object s using Base64 and return a bytes object.
Optional altchars should be a byte string of length 2 which specifies an
alternative alphabet for the '+' and '/' characters. This allows an
application to e.g. generate url or filesystem safe Base64 strings.
"""
encoded = binascii.b2a_base64(s, newline=False)
if altchars is not None:
assert len(altchars) == 2, repr(altchars)
return encoded.translate(bytes.maketrans(b'+/', altchars))
return encoded
def b64decode(s, altchars=None, validate=False):
"""Decode the Base64 encoded bytes-like object or ASCII string s.
Optional altchars must be a bytes-like object or ASCII string of length 2
which specifies the alternative alphabet used instead of the '+' and '/'
characters.
The result is returned as a bytes object. A binascii.Error is raised if
s is incorrectly padded.
If validate is False (the default), characters that are neither in the
normal base-64 alphabet nor the alternative alphabet are discarded prior
to the padding check. If validate is True, these non-alphabet characters
in the input result in a binascii.Error.
"""
s = _bytes_from_decode_data(s)
if altchars is not None:
altchars = _bytes_from_decode_data(altchars)
assert len(altchars) == 2, repr(altchars)
s = s.translate(bytes.maketrans(altchars, b'+/'))
if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
raise binascii.Error('Non-base64 digit found')
return binascii.a2b_base64(s)
def standard_b64encode(s):
"""Encode bytes-like object s using the standard Base64 alphabet.
The result is returned as a bytes object.
"""
return b64encode(s)
def standard_b64decode(s):
"""Decode bytes encoded with the standard Base64 alphabet.
Argument s is a bytes-like object or ASCII string to decode. The result
is returned as a bytes object. A binascii.Error is raised if the input
is incorrectly padded. Characters that are not in the standard alphabet
are discarded prior to the padding check.
"""
return b64decode(s)
_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
def urlsafe_b64encode(s):
"""Encode bytes using the URL- and filesystem-safe Base64 alphabet.
Argument s is a bytes-like object to encode. The result is returned as a
bytes object. The alphabet uses '-' instead of '+' and '_' instead of
'/'.
"""
return b64encode(s).translate(_urlsafe_encode_translation)
def urlsafe_b64decode(s):
"""Decode bytes using the URL- and filesystem-safe Base64 alphabet.
Argument s is a bytes-like object or ASCII string to decode. The result
is returned as a bytes object. A binascii.Error is raised if the input
is incorrectly padded. Characters that are not in the URL-safe base-64
alphabet, and are not a plus '+' or slash '/', are discarded prior to the
padding check.
The alphabet uses '-' instead of '+' and '_' instead of '/'.
"""
s = _bytes_from_decode_data(s)
s = s.translate(_urlsafe_decode_translation)
return b64decode(s)
# Base32 encoding/decoding must be done in Python
_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
_b32tab2 = None
_b32rev = None
def b32encode(s):
"""Encode the bytes-like object s using Base32 and return a bytes object.
"""
global _b32tab2
# Delay the initialization of the table to not waste memory
# if the function is never called
if _b32tab2 is None:
b32tab = [bytes((i,)) for i in _b32alphabet]
_b32tab2 = [a + b for a in b32tab for b in b32tab]
b32tab = None
if not isinstance(s, bytes_types):
s = memoryview(s).tobytes()
leftover = len(s) % 5
# Pad the last quantum with zero bits if necessary
if leftover:
s = s + b'\0' * (5 - leftover) # Don't use += !
encoded = bytearray()
from_bytes = int.from_bytes
b32tab2 = _b32tab2
for i in range(0, len(s), 5):
c = from_bytes(s[i: i + 5], 'big')
encoded += (b32tab2[c >> 30] + # bits 1 - 10
b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
b32tab2[c & 0x3ff] # bits 31 - 40
)
# Adjust for any leftover partial quanta
if leftover == 1:
encoded[-6:] = b'======'
elif leftover == 2:
encoded[-4:] = b'===='
elif leftover == 3:
encoded[-3:] = b'==='
elif leftover == 4:
encoded[-1:] = b'='
return bytes(encoded)
def b32decode(s, casefold=False, map01=None):
"""Decode the Base32 encoded bytes-like object or ASCII string s.
Optional casefold is a flag specifying whether a lowercase alphabet is
acceptable as input. For security purposes, the default is False.
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
letter O (oh), and for optional mapping of the digit 1 (one) to
either the letter I (eye) or letter L (el). The optional argument
map01 when not None, specifies which letter the digit 1 should be
mapped to (when map01 is not None, the digit 0 is always mapped to
the letter O). For security purposes the default is None, so that
0 and 1 are not allowed in the input.
The result is returned as a bytes object. A binascii.Error is raised if
the input is incorrectly padded or if there are non-alphabet
characters present in the input.
"""
global _b32rev
# Delay the initialization of the table to not waste memory
# if the function is never called
if _b32rev is None:
_b32rev = {v: k for k, v in enumerate(_b32alphabet)}
s = _bytes_from_decode_data(s)
if len(s) % 8:
raise binascii.Error('Incorrect padding')
# Handle section 2.4 zero and one mapping. The flag map01 will be either
# False, or the character to map the digit 1 (one) to. It should be
# either L (el) or I (eye).
if map01 is not None:
map01 = _bytes_from_decode_data(map01)
assert len(map01) == 1, repr(map01)
s = s.translate(bytes.maketrans(b'01', b'O' + map01))
if casefold:
s = s.upper()
# Strip off pad characters from the right. We need to count the pad
# characters because this will tell us how many null bytes to remove from
# the end of the decoded string.
l = len(s)
s = s.rstrip(b'=')
padchars = l - len(s)
# Now decode the full quanta
decoded = bytearray()
b32rev = _b32rev
for i in range(0, len(s), 8):
quanta = s[i: i + 8]
acc = 0
try:
for c in quanta:
acc = (acc << 5) + b32rev[c]
except KeyError:
raise binascii.Error('Non-base32 digit found') from None
decoded += acc.to_bytes(5, 'big')
# Process the last, partial quanta
if l % 8 or padchars not in {0, 1, 3, 4, 6}:
raise binascii.Error('Incorrect padding')
if padchars and decoded:
acc <<= 5 * padchars
last = acc.to_bytes(5, 'big')
leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1
decoded[-5:] = last[:leftover]
return bytes(decoded)
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
# lowercase. The RFC also recommends against accepting input case
# insensitively.
def b16encode(s):
"""Encode the bytes-like object s using Base16 and return a bytes object.
"""
return binascii.hexlify(s).upper()
def b16decode(s, casefold=False):
"""Decode the Base16 encoded bytes-like object or ASCII string s.
Optional casefold is a flag specifying whether a lowercase alphabet is
acceptable as input. For security purposes, the default is False.
The result is returned as a bytes object. A binascii.Error is raised if
s is incorrectly padded or if there are non-alphabet characters present
in the input.
"""
s = _bytes_from_decode_data(s)
if casefold:
s = s.upper()
if re.search(b'[^0-9A-F]', s):
raise binascii.Error('Non-base16 digit found')
return binascii.unhexlify(s)
#
# Ascii85 encoding/decoding
#
_a85chars = None
_a85chars2 = None
_A85START = b"<~"
_A85END = b"~>"
def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
# Helper function for a85encode and b85encode
if not isinstance(b, bytes_types):
b = memoryview(b).tobytes()
padding = (-len(b)) % 4
if padding:
b = b + b'\0' * padding
words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
chunks = [b'z' if foldnuls and not word else
b'y' if foldspaces and word == 0x20202020 else
(chars2[word // 614125] +
chars2[word // 85 % 7225] +
chars[word % 85])
for word in words]
if padding and not pad:
if chunks[-1] == b'z':
chunks[-1] = chars[0] * 5
chunks[-1] = chunks[-1][:-padding]
return b''.join(chunks)
def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
"""Encode bytes-like object b using Ascii85 and return a bytes object.
foldspaces is an optional flag that uses the special short sequence 'y'
instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
feature is not supported by the "standard" Adobe encoding.
wrapcol controls whether the output should have newline (b'\\n') characters
added to it. If this is non-zero, each output line will be at most this
many characters long.
pad controls whether the input is padded to a multiple of 4 before
encoding. Note that the btoa implementation always pads.
adobe controls whether the encoded byte sequence is framed with <~ and ~>,
which is used by the Adobe implementation.
"""
global _a85chars, _a85chars2
# Delay the initialization of tables to not waste memory
# if the function is never called
if _a85chars is None:
_a85chars = [bytes((i,)) for i in range(33, 118)]
_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
if adobe:
result = _A85START + result
if wrapcol:
wrapcol = max(2 if adobe else 1, wrapcol)
chunks = [result[i: i + wrapcol]
for i in range(0, len(result), wrapcol)]
if adobe:
if len(chunks[-1]) + 2 > wrapcol:
chunks.append(b'')
result = b'\n'.join(chunks)
if adobe:
result += _A85END
return result
def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
"""Decode the Ascii85 encoded bytes-like object or ASCII string b.
foldspaces is a flag that specifies whether the 'y' short sequence should be
accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
not supported by the "standard" Adobe encoding.
adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
is framed with <~ and ~>).
ignorechars should be a byte string containing characters to ignore from the
input. This should only contain whitespace characters, and by default
contains all whitespace characters in ASCII.
The result is returned as a bytes object.
"""
b = _bytes_from_decode_data(b)
if adobe:
if not b.endswith(_A85END):
raise ValueError(
"Ascii85 encoded byte sequences must end "
"with {!r}".format(_A85END)
)
if b.startswith(_A85START):
b = b[2:-2] # Strip off start/end markers
else:
b = b[:-2]
#
# We have to go through this stepwise, so as to ignore spaces and handle
# special short sequences
#
packI = struct.Struct('!I').pack
decoded = []
decoded_append = decoded.append
curr = []
curr_append = curr.append
curr_clear = curr.clear
for x in b + b'u' * 4:
if b'!'[0] <= x <= b'u'[0]:
curr_append(x)
if len(curr) == 5:
acc = 0
for x in curr:
acc = 85 * acc + (x - 33)
try:
decoded_append(packI(acc))
except struct.error:
raise ValueError('Ascii85 overflow') from None
curr_clear()
elif x == b'z'[0]:
if curr:
raise ValueError('z inside Ascii85 5-tuple')
decoded_append(b'\0\0\0\0')
elif foldspaces and x == b'y'[0]:
if curr:
raise ValueError('y inside Ascii85 5-tuple')
decoded_append(b'\x20\x20\x20\x20')
elif x in ignorechars:
# Skip whitespace
continue
else:
raise ValueError('Non-Ascii85 digit found: %c' % x)
result = b''.join(decoded)
padding = 4 - len(curr)
if padding:
# Throw away the extra padding
result = result[:-padding]
return result
# The following code is originally taken (with permission) from Mercurial
_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
_b85chars = None
_b85chars2 = None
_b85dec = None
def b85encode(b, pad=False):
"""Encode bytes-like object b in base85 format and return a bytes object.
If pad is true, the input is padded with b'\\0' so its length is a multiple of
4 bytes before encoding.
"""
global _b85chars, _b85chars2
# Delay the initialization of tables to not waste memory
# if the function is never called
if _b85chars is None:
_b85chars = [bytes((i,)) for i in _b85alphabet]
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
return _85encode(b, _b85chars, _b85chars2, pad)
def b85decode(b):
"""Decode the base85-encoded bytes-like object or ASCII string b
The result is returned as a bytes object.
"""
global _b85dec
# Delay the initialization of tables to not waste memory
# if the function is never called
if _b85dec is None:
_b85dec = [None] * 256
for i, c in enumerate(_b85alphabet):
_b85dec[c] = i
b = _bytes_from_decode_data(b)
padding = (-len(b)) % 5
b = b + b'~' * padding
out = []
packI = struct.Struct('!I').pack
for i in range(0, len(b), 5):
chunk = b[i:i + 5]
acc = 0
try:
for c in chunk:
acc = acc * 85 + _b85dec[c]
except TypeError:
for j, c in enumerate(chunk):
if _b85dec[c] is None:
raise ValueError('bad base85 character at position %d'
% (i + j)) from None
raise
try:
out.append(packI(acc))
except struct.error:
raise ValueError('base85 overflow in hunk starting at byte %d'
% i) from None
result = b''.join(out)
if padding:
result = result[:-padding]
return result
# Legacy interface. This code could be cleaned up since I don't believe
# binascii has any line length limitations. It just doesn't seem worth it
# though. The files should be opened in binary mode.
MAXLINESIZE = 76 # Excluding the CRLF
MAXBINSIZE = (MAXLINESIZE//4)*3
def encode(input, output):
"""Encode a file; input and output are binary files."""
while True:
s = input.read(MAXBINSIZE)
if not s:
break
while len(s) < MAXBINSIZE:
ns = input.read(MAXBINSIZE-len(s))
if not ns:
break
s += ns
line = binascii.b2a_base64(s)
output.write(line)
def decode(input, output):
"""Decode a file; input and output are binary files."""
while True:
line = input.readline()
if not line:
break
s = binascii.a2b_base64(line)
output.write(s)
def _input_type_check(s):
try:
m = memoryview(s)
except TypeError as err:
msg = "expected bytes-like object, not %s" % s.__class__.__name__
raise TypeError(msg) from err
if m.format not in ('c', 'b', 'B'):
msg = ("expected single byte elements, not %r from %s" %
(m.format, s.__class__.__name__))
raise TypeError(msg)
if m.ndim != 1:
msg = ("expected 1-D data, not %d-D data from %s" %
(m.ndim, s.__class__.__name__))
raise TypeError(msg)
def encodebytes(s):
"""Encode a bytestring into a bytes object containing multiple lines
of base-64 data."""
_input_type_check(s)
pieces = []
for i in range(0, len(s), MAXBINSIZE):
chunk = s[i : i + MAXBINSIZE]
pieces.append(binascii.b2a_base64(chunk))
return b"".join(pieces)
def encodestring(s):
"""Legacy alias of encodebytes()."""
import warnings
warnings.warn("encodestring() is a deprecated alias since 3.1, "
"use encodebytes()",
DeprecationWarning, 2)
return encodebytes(s)
def decodebytes(s):
"""Decode a bytestring of base-64 data into a bytes object."""
_input_type_check(s)
return binascii.a2b_base64(s)
def decodestring(s):
"""Legacy alias of decodebytes()."""
import warnings
warnings.warn("decodestring() is a deprecated alias since Python 3.1, "
"use decodebytes()",
DeprecationWarning, 2)
return decodebytes(s)
# Usable as a script...
def main():
"""Small main program"""
import sys, getopt
try:
opts, args = getopt.getopt(sys.argv[1:], 'deut')
except getopt.error as msg:
sys.stdout = sys.stderr
print(msg)
print("""usage: %s [-d|-e|-u|-t] [file|-]
-d, -u: decode
-e: encode (default)
-t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
sys.exit(2)
func = encode
for o, a in opts:
if o == '-e': func = encode
if o == '-d': func = decode
if o == '-u': func = decode
if o == '-t': test(); return
if args and args[0] != '-':
with open(args[0], 'rb') as f:
func(f, sys.stdout.buffer)
else:
func(sys.stdin.buffer, sys.stdout.buffer)
def test():
s0 = b"Aladdin:open sesame"
print(repr(s0))
s1 = encodebytes(s0)
print(repr(s1))
s2 = decodebytes(s1)
print(repr(s2))
assert s0 == s2
if __name__ == '__main__':
main()

92
Lib/bisect.py Normal file
View File

@@ -0,0 +1,92 @@
"""Bisection algorithms."""
def insort_right(a, x, lo=0, hi=None):
"""Insert item x in list a, and keep it sorted assuming a is sorted.
If x is already in a, insert it to the right of the rightmost x.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if x < a[mid]: hi = mid
else: lo = mid+1
a.insert(lo, x)
insort = insort_right # backward compatibility
def bisect_right(a, x, lo=0, hi=None):
"""Return the index where to insert item x in list a, assuming a is sorted.
The return value i is such that all e in a[:i] have e <= x, and all e in
a[i:] have e > x. So if x already appears in the list, a.insert(x) will
insert just after the rightmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if x < a[mid]: hi = mid
else: lo = mid+1
return lo
bisect = bisect_right # backward compatibility
def insort_left(a, x, lo=0, hi=None):
"""Insert item x in list a, and keep it sorted assuming a is sorted.
If x is already in a, insert it to the left of the leftmost x.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if a[mid] < x: lo = mid+1
else: hi = mid
a.insert(lo, x)
def bisect_left(a, x, lo=0, hi=None):
"""Return the index where to insert item x in list a, assuming a is sorted.
The return value i is such that all e in a[:i] have e < x, and all e in
a[i:] have e >= x. So if x already appears in the list, a.insert(x) will
insert just before the leftmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if a[mid] < x: lo = mid+1
else: hi = mid
return lo
# Overwrite above definitions with a fast C implementation
try:
from _bisect import *
except ImportError:
pass

169
Lib/chunk.py Normal file
View File

@@ -0,0 +1,169 @@
"""Simple class to read IFF chunks.
An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File
Format)) has the following structure:
+----------------+
| ID (4 bytes) |
+----------------+
| size (4 bytes) |
+----------------+
| data |
| ... |
+----------------+
The ID is a 4-byte string which identifies the type of chunk.
The size field (a 32-bit value, encoded using big-endian byte order)
gives the size of the whole chunk, including the 8-byte header.
Usually an IFF-type file consists of one or more chunks. The proposed
usage of the Chunk class defined here is to instantiate an instance at
the start of each chunk and read from the instance until it reaches
the end, after which a new instance can be instantiated. At the end
of the file, creating a new instance will fail with an EOFError
exception.
Usage:
while True:
try:
chunk = Chunk(file)
except EOFError:
break
chunktype = chunk.getname()
while True:
data = chunk.read(nbytes)
if not data:
pass
# do something with data
The interface is file-like. The implemented methods are:
read, close, seek, tell, isatty.
Extra methods are: skip() (called by close, skips to the end of the chunk),
getname() (returns the name (ID) of the chunk)
The __init__ method has one required argument, a file-like object
(including a chunk instance), and one optional argument, a flag which
specifies whether or not chunks are aligned on 2-byte boundaries. The
default is 1, i.e. aligned.
"""
class Chunk:
def __init__(self, file, align=True, bigendian=True, inclheader=False):
import struct
self.closed = False
self.align = align # whether to align to word (2-byte) boundaries
if bigendian:
strflag = '>'
else:
strflag = '<'
self.file = file
self.chunkname = file.read(4)
if len(self.chunkname) < 4:
raise EOFError
try:
self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0]
except struct.error:
raise EOFError
if inclheader:
self.chunksize = self.chunksize - 8 # subtract header
self.size_read = 0
try:
self.offset = self.file.tell()
except (AttributeError, OSError):
self.seekable = False
else:
self.seekable = True
def getname(self):
"""Return the name (ID) of the current chunk."""
return self.chunkname
def getsize(self):
"""Return the size of the current chunk."""
return self.chunksize
def close(self):
if not self.closed:
try:
self.skip()
finally:
self.closed = True
def isatty(self):
if self.closed:
raise ValueError("I/O operation on closed file")
return False
def seek(self, pos, whence=0):
"""Seek to specified position into the chunk.
Default position is 0 (start of chunk).
If the file is not seekable, this will result in an error.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
if not self.seekable:
raise OSError("cannot seek")
if whence == 1:
pos = pos + self.size_read
elif whence == 2:
pos = pos + self.chunksize
if pos < 0 or pos > self.chunksize:
raise RuntimeError
self.file.seek(self.offset + pos, 0)
self.size_read = pos
def tell(self):
if self.closed:
raise ValueError("I/O operation on closed file")
return self.size_read
def read(self, size=-1):
"""Read at most size bytes from the chunk.
If size is omitted or negative, read until the end
of the chunk.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
if self.size_read >= self.chunksize:
return b''
if size < 0:
size = self.chunksize - self.size_read
if size > self.chunksize - self.size_read:
size = self.chunksize - self.size_read
data = self.file.read(size)
self.size_read = self.size_read + len(data)
if self.size_read == self.chunksize and \
self.align and \
(self.chunksize & 1):
dummy = self.file.read(1)
self.size_read = self.size_read + len(dummy)
return data
def skip(self):
"""Skip the rest of the chunk.
If you are not interested in the contents of the chunk,
this method should be called so that the file points to
the start of the next chunk.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
if self.seekable:
try:
n = self.chunksize - self.size_read
# maybe fix alignment
if self.align and (self.chunksize & 1):
n = n + 1
self.file.seek(n, 1)
self.size_read = self.size_read + n
return
except OSError:
pass
while self.size_read < self.chunksize:
n = min(8192, self.chunksize - self.size_read)
dummy = self.read(n)
if not dummy:
raise EOFError

168
Lib/codeop.py Normal file
View File

@@ -0,0 +1,168 @@
r"""Utilities to compile possibly incomplete Python source code.
This module provides two interfaces, broadly similar to the builtin
function compile(), which take program text, a filename and a 'mode'
and:
- Return code object if the command is complete and valid
- Return None if the command is incomplete
- Raise SyntaxError, ValueError or OverflowError if the command is a
syntax error (OverflowError and ValueError can be produced by
malformed literals).
Approach:
First, check if the source consists entirely of blank lines and
comments; if so, replace it with 'pass', because the built-in
parser doesn't always do the right thing for these.
Compile three times: as is, with \n, and with \n\n appended. If it
compiles as is, it's complete. If it compiles with one \n appended,
we expect more. If it doesn't compile either way, we compare the
error we get when compiling with \n or \n\n appended. If the errors
are the same, the code is broken. But if the errors are different, we
expect more. Not intuitive; not even guaranteed to hold in future
releases; but this matches the compiler's behavior from Python 1.4
through 2.2, at least.
Caveat:
It is possible (but not likely) that the parser stops parsing with a
successful outcome before reaching the end of the source; in this
case, trailing symbols may be ignored instead of causing an error.
For example, a backslash followed by two newlines may be followed by
arbitrary garbage. This will be fixed once the API for the parser is
better.
The two interfaces are:
compile_command(source, filename, symbol):
Compiles a single command in the manner described above.
CommandCompiler():
Instances of this class have __call__ methods identical in
signature to compile_command; the difference is that if the
instance compiles program text containing a __future__ statement,
the instance 'remembers' and compiles all subsequent program texts
with the statement in force.
The module also provides another class:
Compile():
Instances of this class act like the built-in function compile,
but with 'memory' in the sense described above.
"""
import __future__
_features = [getattr(__future__, fname)
for fname in __future__.all_feature_names]
__all__ = ["compile_command", "Compile", "CommandCompiler"]
PyCF_DONT_IMPLY_DEDENT = 0x200 # Matches pythonrun.h
def _maybe_compile(compiler, source, filename, symbol):
# Check for source consisting of only blank lines and comments
for line in source.split("\n"):
line = line.strip()
if line and line[0] != '#':
break # Leave it alone
else:
if symbol != "eval":
source = "pass" # Replace it with a 'pass' statement
err = err1 = err2 = None
code = code1 = code2 = None
try:
code = compiler(source, filename, symbol)
except SyntaxError as err:
pass
try:
code1 = compiler(source + "\n", filename, symbol)
except SyntaxError as e:
err1 = e
try:
code2 = compiler(source + "\n\n", filename, symbol)
except SyntaxError as e:
err2 = e
if code:
return code
if not code1 and repr(err1) == repr(err2):
raise err1
def _compile(source, filename, symbol):
return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT)
def compile_command(source, filename="<input>", symbol="single"):
r"""Compile a command and determine whether it is incomplete.
Arguments:
source -- the source string; may contain \n characters
filename -- optional filename from which source was read; default
"<input>"
symbol -- optional grammar start symbol; "single" (default) or "eval"
Return value / exceptions raised:
- Return a code object if the command is complete and valid
- Return None if the command is incomplete
- Raise SyntaxError, ValueError or OverflowError if the command is a
syntax error (OverflowError and ValueError can be produced by
malformed literals).
"""
return _maybe_compile(_compile, source, filename, symbol)
class Compile:
"""Instances of this class behave much like the built-in compile
function, but if one is used to compile text containing a future
statement, it "remembers" and compiles all subsequent program texts
with the statement in force."""
def __init__(self):
self.flags = PyCF_DONT_IMPLY_DEDENT
def __call__(self, source, filename, symbol):
codeob = compile(source, filename, symbol, self.flags, 1)
for feature in _features:
if codeob.co_flags & feature.compiler_flag:
self.flags |= feature.compiler_flag
return codeob
class CommandCompiler:
"""Instances of this class have __call__ methods identical in
signature to compile_command; the difference is that if the
instance compiles program text containing a __future__ statement,
the instance 'remembers' and compiles all subsequent program texts
with the statement in force."""
def __init__(self,):
self.compiler = Compile()
def __call__(self, source, filename="<input>", symbol="single"):
r"""Compile a command and determine whether it is incomplete.
Arguments:
source -- the source string; may contain \n characters
filename -- optional filename from which source was read;
default "<input>"
symbol -- optional grammar start symbol; "single" (default) or
"eval"
Return value / exceptions raised:
- Return a code object if the command is complete and valid
- Return None if the command is incomplete
- Raise SyntaxError, ValueError or OverflowError if the command is a
syntax error (OverflowError and ValueError can be produced by
malformed literals).
"""
return _maybe_compile(self.compiler, source, filename, symbol)

206
Lib/copyreg.py Normal file
View File

@@ -0,0 +1,206 @@
"""Helper to provide extensibility for pickle.
This is only useful to add pickle support for extension types defined in
C, not for instances of user-defined classes.
"""
__all__ = ["pickle", "constructor",
"add_extension", "remove_extension", "clear_extension_cache"]
dispatch_table = {}
def pickle(ob_type, pickle_function, constructor_ob=None):
if not callable(pickle_function):
raise TypeError("reduction functions must be callable")
dispatch_table[ob_type] = pickle_function
# The constructor_ob function is a vestige of safe for unpickling.
# There is no reason for the caller to pass it anymore.
if constructor_ob is not None:
constructor(constructor_ob)
def constructor(object):
if not callable(object):
raise TypeError("constructors must be callable")
# Example: provide pickling support for complex numbers.
try:
complex
except NameError:
pass
else:
def pickle_complex(c):
return complex, (c.real, c.imag)
pickle(complex, pickle_complex, complex)
# Support for pickling new-style objects
def _reconstructor(cls, base, state):
if base is object:
obj = object.__new__(cls)
else:
obj = base.__new__(cls, state)
if base.__init__ != object.__init__:
base.__init__(obj, state)
return obj
_HEAPTYPE = 1<<9
# Python code for object.__reduce_ex__ for protocols 0 and 1
def _reduce_ex(self, proto):
assert proto < 2
for base in self.__class__.__mro__:
if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE:
break
else:
base = object # not really reachable
if base is object:
state = None
else:
if base is self.__class__:
raise TypeError("can't pickle %s objects" % base.__name__)
state = base(self)
args = (self.__class__, base, state)
try:
getstate = self.__getstate__
except AttributeError:
if getattr(self, "__slots__", None):
raise TypeError("a class that defines __slots__ without "
"defining __getstate__ cannot be pickled")
try:
dict = self.__dict__
except AttributeError:
dict = None
else:
dict = getstate()
if dict:
return _reconstructor, args, dict
else:
return _reconstructor, args
# Helper for __reduce_ex__ protocol 2
def __newobj__(cls, *args):
return cls.__new__(cls, *args)
def __newobj_ex__(cls, args, kwargs):
"""Used by pickle protocol 4, instead of __newobj__ to allow classes with
keyword-only arguments to be pickled correctly.
"""
return cls.__new__(cls, *args, **kwargs)
def _slotnames(cls):
"""Return a list of slot names for a given class.
This needs to find slots defined by the class and its bases, so we
can't simply return the __slots__ attribute. We must walk down
the Method Resolution Order and concatenate the __slots__ of each
class found there. (This assumes classes don't modify their
__slots__ attribute to misrepresent their slots after the class is
defined.)
"""
# Get the value from a cache in the class if possible
names = cls.__dict__.get("__slotnames__")
if names is not None:
return names
# Not cached -- calculate the value
names = []
if not hasattr(cls, "__slots__"):
# This class has no slots
pass
else:
# Slots found -- gather slot names from all base classes
for c in cls.__mro__:
if "__slots__" in c.__dict__:
slots = c.__dict__['__slots__']
# if class has a single slot, it can be given as a string
if isinstance(slots, str):
slots = (slots,)
for name in slots:
# special descriptors
if name in ("__dict__", "__weakref__"):
continue
# mangled names
elif name.startswith('__') and not name.endswith('__'):
stripped = c.__name__.lstrip('_')
if stripped:
names.append('_%s%s' % (stripped, name))
else:
names.append(name)
else:
names.append(name)
# Cache the outcome in the class if at all possible
try:
cls.__slotnames__ = names
except:
pass # But don't die if we can't
return names
# A registry of extension codes. This is an ad-hoc compression
# mechanism. Whenever a global reference to <module>, <name> is about
# to be pickled, the (<module>, <name>) tuple is looked up here to see
# if it is a registered extension code for it. Extension codes are
# universal, so that the meaning of a pickle does not depend on
# context. (There are also some codes reserved for local use that
# don't have this restriction.) Codes are positive ints; 0 is
# reserved.
_extension_registry = {} # key -> code
_inverted_registry = {} # code -> key
_extension_cache = {} # code -> object
# Don't ever rebind those names: pickling grabs a reference to them when
# it's initialized, and won't see a rebinding.
def add_extension(module, name, code):
"""Register an extension code."""
code = int(code)
if not 1 <= code <= 0x7fffffff:
raise ValueError("code out of range")
key = (module, name)
if (_extension_registry.get(key) == code and
_inverted_registry.get(code) == key):
return # Redundant registrations are benign
if key in _extension_registry:
raise ValueError("key %s is already registered with code %s" %
(key, _extension_registry[key]))
if code in _inverted_registry:
raise ValueError("code %s is already in use for key %s" %
(code, _inverted_registry[code]))
_extension_registry[key] = code
_inverted_registry[code] = key
def remove_extension(module, name, code):
"""Unregister an extension code. For testing only."""
key = (module, name)
if (_extension_registry.get(key) != code or
_inverted_registry.get(code) != key):
raise ValueError("key %s is not registered with code %s" %
(key, code))
del _extension_registry[key]
del _inverted_registry[code]
if code in _extension_cache:
del _extension_cache[code]
def clear_extension_cache():
_extension_cache.clear()
# Standard extension code assignments
# Reserved ranges
# First Last Count Purpose
# 1 127 127 Reserved for Python standard library
# 128 191 64 Reserved for Zope
# 192 239 48 Reserved for 3rd parties
# 240 255 16 Reserved for private use (will never be assigned)
# 256 Inf Inf Reserved for future assignment
# Extension codes are assigned by the Python Software Foundation.

452
Lib/formatter.py Normal file
View File

@@ -0,0 +1,452 @@
"""Generic output formatting.
Formatter objects transform an abstract flow of formatting events into
specific output events on writer objects. Formatters manage several stack
structures to allow various properties of a writer object to be changed and
restored; writers need not be able to handle relative changes nor any sort
of ``change back'' operation. Specific writer properties which may be
controlled via formatter objects are horizontal alignment, font, and left
margin indentations. A mechanism is provided which supports providing
arbitrary, non-exclusive style settings to a writer as well. Additional
interfaces facilitate formatting events which are not reversible, such as
paragraph separation.
Writer objects encapsulate device interfaces. Abstract devices, such as
file formats, are supported as well as physical devices. The provided
implementations all work with abstract devices. The interface makes
available mechanisms for setting the properties which formatter objects
manage and inserting data into the output.
"""
import sys
import warnings
warnings.warn('the formatter module is deprecated', DeprecationWarning,
stacklevel=2)
AS_IS = None
class NullFormatter:
"""A formatter which does nothing.
If the writer parameter is omitted, a NullWriter instance is created.
No methods of the writer are called by NullFormatter instances.
Implementations should inherit from this class if implementing a writer
interface but don't need to inherit any implementation.
"""
def __init__(self, writer=None):
if writer is None:
writer = NullWriter()
self.writer = writer
def end_paragraph(self, blankline): pass
def add_line_break(self): pass
def add_hor_rule(self, *args, **kw): pass
def add_label_data(self, format, counter, blankline=None): pass
def add_flowing_data(self, data): pass
def add_literal_data(self, data): pass
def flush_softspace(self): pass
def push_alignment(self, align): pass
def pop_alignment(self): pass
def push_font(self, x): pass
def pop_font(self): pass
def push_margin(self, margin): pass
def pop_margin(self): pass
def set_spacing(self, spacing): pass
def push_style(self, *styles): pass
def pop_style(self, n=1): pass
def assert_line_data(self, flag=1): pass
class AbstractFormatter:
"""The standard formatter.
This implementation has demonstrated wide applicability to many writers,
and may be used directly in most circumstances. It has been used to
implement a full-featured World Wide Web browser.
"""
# Space handling policy: blank spaces at the boundary between elements
# are handled by the outermost context. "Literal" data is not checked
# to determine context, so spaces in literal data are handled directly
# in all circumstances.
def __init__(self, writer):
self.writer = writer # Output device
self.align = None # Current alignment
self.align_stack = [] # Alignment stack
self.font_stack = [] # Font state
self.margin_stack = [] # Margin state
self.spacing = None # Vertical spacing state
self.style_stack = [] # Other state, e.g. color
self.nospace = 1 # Should leading space be suppressed
self.softspace = 0 # Should a space be inserted
self.para_end = 1 # Just ended a paragraph
self.parskip = 0 # Skipped space between paragraphs?
self.hard_break = 1 # Have a hard break
self.have_label = 0
def end_paragraph(self, blankline):
if not self.hard_break:
self.writer.send_line_break()
self.have_label = 0
if self.parskip < blankline and not self.have_label:
self.writer.send_paragraph(blankline - self.parskip)
self.parskip = blankline
self.have_label = 0
self.hard_break = self.nospace = self.para_end = 1
self.softspace = 0
def add_line_break(self):
if not (self.hard_break or self.para_end):
self.writer.send_line_break()
self.have_label = self.parskip = 0
self.hard_break = self.nospace = 1
self.softspace = 0
def add_hor_rule(self, *args, **kw):
if not self.hard_break:
self.writer.send_line_break()
self.writer.send_hor_rule(*args, **kw)
self.hard_break = self.nospace = 1
self.have_label = self.para_end = self.softspace = self.parskip = 0
def add_label_data(self, format, counter, blankline = None):
if self.have_label or not self.hard_break:
self.writer.send_line_break()
if not self.para_end:
self.writer.send_paragraph((blankline and 1) or 0)
if isinstance(format, str):
self.writer.send_label_data(self.format_counter(format, counter))
else:
self.writer.send_label_data(format)
self.nospace = self.have_label = self.hard_break = self.para_end = 1
self.softspace = self.parskip = 0
def format_counter(self, format, counter):
label = ''
for c in format:
if c == '1':
label = label + ('%d' % counter)
elif c in 'aA':
if counter > 0:
label = label + self.format_letter(c, counter)
elif c in 'iI':
if counter > 0:
label = label + self.format_roman(c, counter)
else:
label = label + c
return label
def format_letter(self, case, counter):
label = ''
while counter > 0:
counter, x = divmod(counter-1, 26)
# This makes a strong assumption that lowercase letters
# and uppercase letters form two contiguous blocks, with
# letters in order!
s = chr(ord(case) + x)
label = s + label
return label
def format_roman(self, case, counter):
ones = ['i', 'x', 'c', 'm']
fives = ['v', 'l', 'd']
label, index = '', 0
# This will die of IndexError when counter is too big
while counter > 0:
counter, x = divmod(counter, 10)
if x == 9:
label = ones[index] + ones[index+1] + label
elif x == 4:
label = ones[index] + fives[index] + label
else:
if x >= 5:
s = fives[index]
x = x-5
else:
s = ''
s = s + ones[index]*x
label = s + label
index = index + 1
if case == 'I':
return label.upper()
return label
def add_flowing_data(self, data):
if not data: return
prespace = data[:1].isspace()
postspace = data[-1:].isspace()
data = " ".join(data.split())
if self.nospace and not data:
return
elif prespace or self.softspace:
if not data:
if not self.nospace:
self.softspace = 1
self.parskip = 0
return
if not self.nospace:
data = ' ' + data
self.hard_break = self.nospace = self.para_end = \
self.parskip = self.have_label = 0
self.softspace = postspace
self.writer.send_flowing_data(data)
def add_literal_data(self, data):
if not data: return
if self.softspace:
self.writer.send_flowing_data(" ")
self.hard_break = data[-1:] == '\n'
self.nospace = self.para_end = self.softspace = \
self.parskip = self.have_label = 0
self.writer.send_literal_data(data)
def flush_softspace(self):
if self.softspace:
self.hard_break = self.para_end = self.parskip = \
self.have_label = self.softspace = 0
self.nospace = 1
self.writer.send_flowing_data(' ')
def push_alignment(self, align):
if align and align != self.align:
self.writer.new_alignment(align)
self.align = align
self.align_stack.append(align)
else:
self.align_stack.append(self.align)
def pop_alignment(self):
if self.align_stack:
del self.align_stack[-1]
if self.align_stack:
self.align = align = self.align_stack[-1]
self.writer.new_alignment(align)
else:
self.align = None
self.writer.new_alignment(None)
def push_font(self, font):
size, i, b, tt = font
if self.softspace:
self.hard_break = self.para_end = self.softspace = 0
self.nospace = 1
self.writer.send_flowing_data(' ')
if self.font_stack:
csize, ci, cb, ctt = self.font_stack[-1]
if size is AS_IS: size = csize
if i is AS_IS: i = ci
if b is AS_IS: b = cb
if tt is AS_IS: tt = ctt
font = (size, i, b, tt)
self.font_stack.append(font)
self.writer.new_font(font)
def pop_font(self):
if self.font_stack:
del self.font_stack[-1]
if self.font_stack:
font = self.font_stack[-1]
else:
font = None
self.writer.new_font(font)
def push_margin(self, margin):
self.margin_stack.append(margin)
fstack = [m for m in self.margin_stack if m]
if not margin and fstack:
margin = fstack[-1]
self.writer.new_margin(margin, len(fstack))
def pop_margin(self):
if self.margin_stack:
del self.margin_stack[-1]
fstack = [m for m in self.margin_stack if m]
if fstack:
margin = fstack[-1]
else:
margin = None
self.writer.new_margin(margin, len(fstack))
def set_spacing(self, spacing):
self.spacing = spacing
self.writer.new_spacing(spacing)
def push_style(self, *styles):
if self.softspace:
self.hard_break = self.para_end = self.softspace = 0
self.nospace = 1
self.writer.send_flowing_data(' ')
for style in styles:
self.style_stack.append(style)
self.writer.new_styles(tuple(self.style_stack))
def pop_style(self, n=1):
del self.style_stack[-n:]
self.writer.new_styles(tuple(self.style_stack))
def assert_line_data(self, flag=1):
self.nospace = self.hard_break = not flag
self.para_end = self.parskip = self.have_label = 0
class NullWriter:
"""Minimal writer interface to use in testing & inheritance.
A writer which only provides the interface definition; no actions are
taken on any methods. This should be the base class for all writers
which do not need to inherit any implementation methods.
"""
def __init__(self): pass
def flush(self): pass
def new_alignment(self, align): pass
def new_font(self, font): pass
def new_margin(self, margin, level): pass
def new_spacing(self, spacing): pass
def new_styles(self, styles): pass
def send_paragraph(self, blankline): pass
def send_line_break(self): pass
def send_hor_rule(self, *args, **kw): pass
def send_label_data(self, data): pass
def send_flowing_data(self, data): pass
def send_literal_data(self, data): pass
class AbstractWriter(NullWriter):
"""A writer which can be used in debugging formatters, but not much else.
Each method simply announces itself by printing its name and
arguments on standard output.
"""
def new_alignment(self, align):
print("new_alignment(%r)" % (align,))
def new_font(self, font):
print("new_font(%r)" % (font,))
def new_margin(self, margin, level):
print("new_margin(%r, %d)" % (margin, level))
def new_spacing(self, spacing):
print("new_spacing(%r)" % (spacing,))
def new_styles(self, styles):
print("new_styles(%r)" % (styles,))
def send_paragraph(self, blankline):
print("send_paragraph(%r)" % (blankline,))
def send_line_break(self):
print("send_line_break()")
def send_hor_rule(self, *args, **kw):
print("send_hor_rule()")
def send_label_data(self, data):
print("send_label_data(%r)" % (data,))
def send_flowing_data(self, data):
print("send_flowing_data(%r)" % (data,))
def send_literal_data(self, data):
print("send_literal_data(%r)" % (data,))
class DumbWriter(NullWriter):
"""Simple writer class which writes output on the file object passed in
as the file parameter or, if file is omitted, on standard output. The
output is simply word-wrapped to the number of columns specified by
the maxcol parameter. This class is suitable for reflowing a sequence
of paragraphs.
"""
def __init__(self, file=None, maxcol=72):
self.file = file or sys.stdout
self.maxcol = maxcol
NullWriter.__init__(self)
self.reset()
def reset(self):
self.col = 0
self.atbreak = 0
def send_paragraph(self, blankline):
self.file.write('\n'*blankline)
self.col = 0
self.atbreak = 0
def send_line_break(self):
self.file.write('\n')
self.col = 0
self.atbreak = 0
def send_hor_rule(self, *args, **kw):
self.file.write('\n')
self.file.write('-'*self.maxcol)
self.file.write('\n')
self.col = 0
self.atbreak = 0
def send_literal_data(self, data):
self.file.write(data)
i = data.rfind('\n')
if i >= 0:
self.col = 0
data = data[i+1:]
data = data.expandtabs()
self.col = self.col + len(data)
self.atbreak = 0
def send_flowing_data(self, data):
if not data: return
atbreak = self.atbreak or data[0].isspace()
col = self.col
maxcol = self.maxcol
write = self.file.write
for word in data.split():
if atbreak:
if col + len(word) >= maxcol:
write('\n')
col = 0
else:
write(' ')
col = col + 1
write(word)
col = col + len(word)
atbreak = 1
self.col = col
self.atbreak = data[-1].isspace()
def test(file = None):
w = DumbWriter()
f = AbstractFormatter(w)
if file is not None:
fp = open(file)
elif sys.argv[1:]:
fp = open(sys.argv[1])
else:
fp = sys.stdin
try:
for line in fp:
if line == '\n':
f.end_paragraph(1)
else:
f.add_flowing_data(line)
finally:
if fp is not sys.stdin:
fp.close()
f.end_paragraph(0)
if __name__ == '__main__':
test()

68
Lib/nturl2path.py Normal file
View File

@@ -0,0 +1,68 @@
"""Convert a NT pathname to a file URL and vice versa."""
def url2pathname(url):
"""OS-specific conversion from a relative URL of the 'file' scheme
to a file system path; not recommended for general use."""
# e.g.
# ///C|/foo/bar/spam.foo
# and
# ///C:/foo/bar/spam.foo
# become
# C:\foo\bar\spam.foo
import string, urllib.parse
# Windows itself uses ":" even in URLs.
url = url.replace(':', '|')
if not '|' in url:
# No drive specifier, just convert slashes
if url[:4] == '////':
# path is something like ////host/path/on/remote/host
# convert this to \\host\path\on\remote\host
# (notice halving of slashes at the start of the path)
url = url[2:]
components = url.split('/')
# make sure not to convert quoted slashes :-)
return urllib.parse.unquote('\\'.join(components))
comp = url.split('|')
if len(comp) != 2 or comp[0][-1] not in string.ascii_letters:
error = 'Bad URL: ' + url
raise OSError(error)
drive = comp[0][-1].upper()
components = comp[1].split('/')
path = drive + ':'
for comp in components:
if comp:
path = path + '\\' + urllib.parse.unquote(comp)
# Issue #11474 - handing url such as |c/|
if path.endswith(':') and url.endswith('/'):
path += '\\'
return path
def pathname2url(p):
"""OS-specific conversion from a file system path to a relative URL
of the 'file' scheme; not recommended for general use."""
# e.g.
# C:\foo\bar\spam.foo
# becomes
# ///C:/foo/bar/spam.foo
import urllib.parse
if not ':' in p:
# No drive specifier, just convert slashes and quote the name
if p[:2] == '\\\\':
# path is something like \\host\path\on\remote\host
# convert this to ////host/path/on/remote/host
# (notice doubling of slashes at the start of the path)
p = '\\\\' + p
components = p.split('\\')
return urllib.parse.quote('/'.join(components))
comp = p.split(':')
if len(comp) != 2 or len(comp[0]) > 1:
error = 'Bad path: ' + p
raise OSError(error)
drive = urllib.parse.quote(comp[0].upper())
components = comp[1].split('\\')
path = '///' + drive + ':'
for comp in components:
if comp:
path = path + '/' + urllib.parse.quote(comp)
return path

View File

View File

@@ -0,0 +1,6 @@
/*
CSS file for pydoc.
Contents of this file are subject to change without notice.
*/

13062
Lib/pydoc_data/topics.py Normal file

File diff suppressed because it is too large Load Diff

242
Lib/quopri.py Executable file
View File

@@ -0,0 +1,242 @@
#! /usr/bin/python3.6
"""Conversions to/from quoted-printable transport encoding as per RFC 1521."""
# (Dec 1991 version).
__all__ = ["encode", "decode", "encodestring", "decodestring"]
ESCAPE = b'='
MAXLINESIZE = 76
HEX = b'0123456789ABCDEF'
EMPTYSTRING = b''
try:
from binascii import a2b_qp, b2a_qp
except ImportError:
a2b_qp = None
b2a_qp = None
def needsquoting(c, quotetabs, header):
"""Decide whether a particular byte ordinal needs to be quoted.
The 'quotetabs' flag indicates whether embedded tabs and spaces should be
quoted. Note that line-ending tabs and spaces are always encoded, as per
RFC 1521.
"""
assert isinstance(c, bytes)
if c in b' \t':
return quotetabs
# if header, we have to escape _ because _ is used to escape space
if c == b'_':
return header
return c == ESCAPE or not (b' ' <= c <= b'~')
def quote(c):
"""Quote a single character."""
assert isinstance(c, bytes) and len(c)==1
c = ord(c)
return ESCAPE + bytes((HEX[c//16], HEX[c%16]))
def encode(input, output, quotetabs, header=False):
"""Read 'input', apply quoted-printable encoding, and write to 'output'.
'input' and 'output' are binary file objects. The 'quotetabs' flag
indicates whether embedded tabs and spaces should be quoted. Note that
line-ending tabs and spaces are always encoded, as per RFC 1521.
The 'header' flag indicates whether we are encoding spaces as _ as per RFC
1522."""
if b2a_qp is not None:
data = input.read()
odata = b2a_qp(data, quotetabs=quotetabs, header=header)
output.write(odata)
return
def write(s, output=output, lineEnd=b'\n'):
# RFC 1521 requires that the line ending in a space or tab must have
# that trailing character encoded.
if s and s[-1:] in b' \t':
output.write(s[:-1] + quote(s[-1:]) + lineEnd)
elif s == b'.':
output.write(quote(s) + lineEnd)
else:
output.write(s + lineEnd)
prevline = None
while 1:
line = input.readline()
if not line:
break
outline = []
# Strip off any readline induced trailing newline
stripped = b''
if line[-1:] == b'\n':
line = line[:-1]
stripped = b'\n'
# Calculate the un-length-limited encoded line
for c in line:
c = bytes((c,))
if needsquoting(c, quotetabs, header):
c = quote(c)
if header and c == b' ':
outline.append(b'_')
else:
outline.append(c)
# First, write out the previous line
if prevline is not None:
write(prevline)
# Now see if we need any soft line breaks because of RFC-imposed
# length limitations. Then do the thisline->prevline dance.
thisline = EMPTYSTRING.join(outline)
while len(thisline) > MAXLINESIZE:
# Don't forget to include the soft line break `=' sign in the
# length calculation!
write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n')
thisline = thisline[MAXLINESIZE-1:]
# Write out the current line
prevline = thisline
# Write out the last line, without a trailing newline
if prevline is not None:
write(prevline, lineEnd=stripped)
def encodestring(s, quotetabs=False, header=False):
if b2a_qp is not None:
return b2a_qp(s, quotetabs=quotetabs, header=header)
from io import BytesIO
infp = BytesIO(s)
outfp = BytesIO()
encode(infp, outfp, quotetabs, header)
return outfp.getvalue()
def decode(input, output, header=False):
"""Read 'input', apply quoted-printable decoding, and write to 'output'.
'input' and 'output' are binary file objects.
If 'header' is true, decode underscore as space (per RFC 1522)."""
if a2b_qp is not None:
data = input.read()
odata = a2b_qp(data, header=header)
output.write(odata)
return
new = b''
while 1:
line = input.readline()
if not line: break
i, n = 0, len(line)
if n > 0 and line[n-1:n] == b'\n':
partial = 0; n = n-1
# Strip trailing whitespace
while n > 0 and line[n-1:n] in b" \t\r":
n = n-1
else:
partial = 1
while i < n:
c = line[i:i+1]
if c == b'_' and header:
new = new + b' '; i = i+1
elif c != ESCAPE:
new = new + c; i = i+1
elif i+1 == n and not partial:
partial = 1; break
elif i+1 < n and line[i+1:i+2] == ESCAPE:
new = new + ESCAPE; i = i+2
elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]):
new = new + bytes((unhex(line[i+1:i+3]),)); i = i+3
else: # Bad escape sequence -- leave it in
new = new + c; i = i+1
if not partial:
output.write(new + b'\n')
new = b''
if new:
output.write(new)
def decodestring(s, header=False):
if a2b_qp is not None:
return a2b_qp(s, header=header)
from io import BytesIO
infp = BytesIO(s)
outfp = BytesIO()
decode(infp, outfp, header=header)
return outfp.getvalue()
# Other helper functions
def ishex(c):
"""Return true if the byte ordinal 'c' is a hexadecimal digit in ASCII."""
assert isinstance(c, bytes)
return b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F'
def unhex(s):
"""Get the integer value of a hexadecimal number."""
bits = 0
for c in s:
c = bytes((c,))
if b'0' <= c <= b'9':
i = ord('0')
elif b'a' <= c <= b'f':
i = ord('a')-10
elif b'A' <= c <= b'F':
i = ord(b'A')-10
else:
assert False, "non-hex digit "+repr(c)
bits = bits*16 + (ord(c) - i)
return bits
def main():
import sys
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], 'td')
except getopt.error as msg:
sys.stdout = sys.stderr
print(msg)
print("usage: quopri [-t | -d] [file] ...")
print("-t: quote tabs")
print("-d: decode; default encode")
sys.exit(2)
deco = 0
tabs = 0
for o, a in opts:
if o == '-t': tabs = 1
if o == '-d': deco = 1
if tabs and deco:
sys.stdout = sys.stderr
print("-t and -d are mutually exclusive")
sys.exit(2)
if not args: args = ['-']
sts = 0
for file in args:
if file == '-':
fp = sys.stdin.buffer
else:
try:
fp = open(file, "rb")
except OSError as msg:
sys.stderr.write("%s: can't open (%s)\n" % (file, msg))
sts = 1
continue
try:
if deco:
decode(fp, sys.stdout.buffer)
else:
encode(fp, sys.stdout.buffer, tabs)
finally:
if file != '-':
fp.close()
if sts:
sys.exit(sts)
if __name__ == '__main__':
main()

1
Lib/sitecustomize.py Symbolic link
View File

@@ -0,0 +1 @@
/etc/python3.6/sitecustomize.py

23
Lib/wsgiref/__init__.py Normal file
View File

@@ -0,0 +1,23 @@
"""wsgiref -- a WSGI (PEP 3333) Reference Library
Current Contents:
* util -- Miscellaneous useful functions and wrappers
* headers -- Manage response headers
* handlers -- base classes for server/gateway implementations
* simple_server -- a simple BaseHTTPServer that supports WSGI
* validate -- validation wrapper that sits between an app and a server
to detect errors in either
To-Do:
* cgi_gateway -- Run WSGI apps under CGI (pending a deployment standard)
* cgi_wrapper -- Run CGI apps under WSGI
* router -- a simple middleware component that handles URL traversal
"""

557
Lib/wsgiref/handlers.py Normal file
View File

@@ -0,0 +1,557 @@
"""Base classes for server/gateway implementations"""
from .util import FileWrapper, guess_scheme, is_hop_by_hop
from .headers import Headers
import sys, os, time
__all__ = [
'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
'IISCGIHandler', 'read_environ'
]
# Weekday and month names for HTTP date/time formatting; always English!
_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
_monthname = [None, # Dummy so we can use 1-based month numbers
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
def format_date_time(timestamp):
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
_weekdayname[wd], day, _monthname[month], year, hh, mm, ss
)
_is_request = {
'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
}.__contains__
def _needs_transcode(k):
return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
def read_environ():
"""Read environment, fixing HTTP variables"""
enc = sys.getfilesystemencoding()
esc = 'surrogateescape'
try:
''.encode('utf-8', esc)
except LookupError:
esc = 'replace'
environ = {}
# Take the basic environment from native-unicode os.environ. Attempt to
# fix up the variables that come from the HTTP request to compensate for
# the bytes->unicode decoding step that will already have taken place.
for k, v in os.environ.items():
if _needs_transcode(k):
# On win32, the os.environ is natively Unicode. Different servers
# decode the request bytes using different encodings.
if sys.platform == 'win32':
software = os.environ.get('SERVER_SOFTWARE', '').lower()
# On IIS, the HTTP request will be decoded as UTF-8 as long
# as the input is a valid UTF-8 sequence. Otherwise it is
# decoded using the system code page (mbcs), with no way to
# detect this has happened. Because UTF-8 is the more likely
# encoding, and mbcs is inherently unreliable (an mbcs string
# that happens to be valid UTF-8 will not be decoded as mbcs)
# always recreate the original bytes as UTF-8.
if software.startswith('microsoft-iis/'):
v = v.encode('utf-8').decode('iso-8859-1')
# Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
# to the Unicode environ. No modification needed.
elif software.startswith('apache/'):
pass
# Python 3's http.server.CGIHTTPRequestHandler decodes
# using the urllib.unquote default of UTF-8, amongst other
# issues.
elif (
software.startswith('simplehttp/')
and 'python/3' in software
):
v = v.encode('utf-8').decode('iso-8859-1')
# For other servers, guess that they have written bytes to
# the environ using stdio byte-oriented interfaces, ending up
# with the system code page.
else:
v = v.encode(enc, 'replace').decode('iso-8859-1')
# Recover bytes from unicode environ, using surrogate escapes
# where available (Python 3.1+).
else:
v = v.encode(enc, esc).decode('iso-8859-1')
environ[k] = v
return environ
class BaseHandler:
"""Manage the invocation of a WSGI application"""
# Configuration parameters; can override per-subclass or per-instance
wsgi_version = (1,0)
wsgi_multithread = True
wsgi_multiprocess = True
wsgi_run_once = False
origin_server = True # We are transmitting direct to client
http_version = "1.0" # Version that should be used for response
server_software = None # String name of server software, if any
# os_environ is used to supply configuration from the OS environment:
# by default it's a copy of 'os.environ' as of import time, but you can
# override this in e.g. your __init__ method.
os_environ= read_environ()
# Collaborator classes
wsgi_file_wrapper = FileWrapper # set to None to disable
headers_class = Headers # must be a Headers-like class
# Error handling (also per-subclass or per-instance)
traceback_limit = None # Print entire traceback to self.get_stderr()
error_status = "500 Internal Server Error"
error_headers = [('Content-Type','text/plain')]
error_body = b"A server error occurred. Please contact the administrator."
# State variables (don't mess with these)
status = result = None
headers_sent = False
headers = None
bytes_sent = 0
def run(self, application):
"""Invoke the application"""
# Note to self: don't move the close()! Asynchronous servers shouldn't
# call close() from finish_response(), so if you close() anywhere but
# the double-error branch here, you'll break asynchronous servers by
# prematurely closing. Async servers must return from 'run()' without
# closing if there might still be output to iterate over.
try:
self.setup_environ()
self.result = application(self.environ, self.start_response)
self.finish_response()
except:
try:
self.handle_error()
except:
# If we get an error handling an error, just give up already!
self.close()
raise # ...and let the actual server figure it out.
def setup_environ(self):
"""Set up the environment for one request"""
env = self.environ = self.os_environ.copy()
self.add_cgi_vars()
env['wsgi.input'] = self.get_stdin()
env['wsgi.errors'] = self.get_stderr()
env['wsgi.version'] = self.wsgi_version
env['wsgi.run_once'] = self.wsgi_run_once
env['wsgi.url_scheme'] = self.get_scheme()
env['wsgi.multithread'] = self.wsgi_multithread
env['wsgi.multiprocess'] = self.wsgi_multiprocess
if self.wsgi_file_wrapper is not None:
env['wsgi.file_wrapper'] = self.wsgi_file_wrapper
if self.origin_server and self.server_software:
env.setdefault('SERVER_SOFTWARE',self.server_software)
def finish_response(self):
"""Send any iterable data, then close self and the iterable
Subclasses intended for use in asynchronous servers will
want to redefine this method, such that it sets up callbacks
in the event loop to iterate over the data, and to call
'self.close()' once the response is finished.
"""
try:
if not self.result_is_file() or not self.sendfile():
for data in self.result:
self.write(data)
self.finish_content()
finally:
self.close()
def get_scheme(self):
"""Return the URL scheme being used"""
return guess_scheme(self.environ)
def set_content_length(self):
"""Compute Content-Length or switch to chunked encoding if possible"""
try:
blocks = len(self.result)
except (TypeError,AttributeError,NotImplementedError):
pass
else:
if blocks==1:
self.headers['Content-Length'] = str(self.bytes_sent)
return
# XXX Try for chunked encoding if origin server and client is 1.1
def cleanup_headers(self):
"""Make any necessary header changes or defaults
Subclasses can extend this to add other defaults.
"""
if 'Content-Length' not in self.headers:
self.set_content_length()
def start_response(self, status, headers,exc_info=None):
"""'start_response()' callable as specified by PEP 3333"""
if exc_info:
try:
if self.headers_sent:
# Re-raise original exception if headers sent
raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
finally:
exc_info = None # avoid dangling circular ref
elif self.headers is not None:
raise AssertionError("Headers already set!")
self.status = status
self.headers = self.headers_class(headers)
status = self._convert_string_type(status, "Status")
assert len(status)>=4,"Status must be at least 4 characters"
assert status[:3].isdigit(), "Status message must begin w/3-digit code"
assert status[3]==" ", "Status message must have a space after code"
if __debug__:
for name, val in headers:
name = self._convert_string_type(name, "Header name")
val = self._convert_string_type(val, "Header value")
assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed"
return self.write
def _convert_string_type(self, value, title):
"""Convert/check value type."""
if type(value) is str:
return value
raise AssertionError(
"{0} must be of type str (got {1})".format(title, repr(value))
)
def send_preamble(self):
"""Transmit version/status/date/server, via self._write()"""
if self.origin_server:
if self.client_is_modern():
self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1'))
if 'Date' not in self.headers:
self._write(
('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1')
)
if self.server_software and 'Server' not in self.headers:
self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1'))
else:
self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1'))
def write(self, data):
"""'write()' callable as specified by PEP 3333"""
assert type(data) is bytes, \
"write() argument must be a bytes instance"
if not self.status:
raise AssertionError("write() before start_response()")
elif not self.headers_sent:
# Before the first output, send the stored headers
self.bytes_sent = len(data) # make sure we know content-length
self.send_headers()
else:
self.bytes_sent += len(data)
# XXX check Content-Length and truncate if too many bytes written?
self._write(data)
self._flush()
def sendfile(self):
"""Platform-specific file transmission
Override this method in subclasses to support platform-specific
file transmission. It is only called if the application's
return iterable ('self.result') is an instance of
'self.wsgi_file_wrapper'.
This method should return a true value if it was able to actually
transmit the wrapped file-like object using a platform-specific
approach. It should return a false value if normal iteration
should be used instead. An exception can be raised to indicate
that transmission was attempted, but failed.
NOTE: this method should call 'self.send_headers()' if
'self.headers_sent' is false and it is going to attempt direct
transmission of the file.
"""
return False # No platform-specific transmission by default
def finish_content(self):
"""Ensure headers and content have both been sent"""
if not self.headers_sent:
# Only zero Content-Length if not set by the application (so
# that HEAD requests can be satisfied properly, see #3839)
self.headers.setdefault('Content-Length', "0")
self.send_headers()
else:
pass # XXX check if content-length was too short?
def close(self):
"""Close the iterable (if needed) and reset all instance vars
Subclasses may want to also drop the client connection.
"""
try:
if hasattr(self.result,'close'):
self.result.close()
finally:
self.result = self.headers = self.status = self.environ = None
self.bytes_sent = 0; self.headers_sent = False
def send_headers(self):
"""Transmit headers to the client, via self._write()"""
self.cleanup_headers()
self.headers_sent = True
if not self.origin_server or self.client_is_modern():
self.send_preamble()
self._write(bytes(self.headers))
def result_is_file(self):
"""True if 'self.result' is an instance of 'self.wsgi_file_wrapper'"""
wrapper = self.wsgi_file_wrapper
return wrapper is not None and isinstance(self.result,wrapper)
def client_is_modern(self):
"""True if client can accept status and headers"""
return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9'
def log_exception(self,exc_info):
"""Log the 'exc_info' tuple in the server log
Subclasses may override to retarget the output or change its format.
"""
try:
from traceback import print_exception
stderr = self.get_stderr()
print_exception(
exc_info[0], exc_info[1], exc_info[2],
self.traceback_limit, stderr
)
stderr.flush()
finally:
exc_info = None
def handle_error(self):
"""Log current error, and send error output to client if possible"""
self.log_exception(sys.exc_info())
if not self.headers_sent:
self.result = self.error_output(self.environ, self.start_response)
self.finish_response()
# XXX else: attempt advanced recovery techniques for HTML or text?
def error_output(self, environ, start_response):
"""WSGI mini-app to create error output
By default, this just uses the 'error_status', 'error_headers',
and 'error_body' attributes to generate an output page. It can
be overridden in a subclass to dynamically generate diagnostics,
choose an appropriate message for the user's preferred language, etc.
Note, however, that it's not recommended from a security perspective to
spit out diagnostics to any old user; ideally, you should have to do
something special to enable diagnostic output, which is why we don't
include any here!
"""
start_response(self.error_status,self.error_headers[:],sys.exc_info())
return [self.error_body]
# Pure abstract methods; *must* be overridden in subclasses
def _write(self,data):
"""Override in subclass to buffer data for send to client
It's okay if this method actually transmits the data; BaseHandler
just separates write and flush operations for greater efficiency
when the underlying system actually has such a distinction.
"""
raise NotImplementedError
def _flush(self):
"""Override in subclass to force sending of recent '_write()' calls
It's okay if this method is a no-op (i.e., if '_write()' actually
sends the data.
"""
raise NotImplementedError
def get_stdin(self):
"""Override in subclass to return suitable 'wsgi.input'"""
raise NotImplementedError
def get_stderr(self):
"""Override in subclass to return suitable 'wsgi.errors'"""
raise NotImplementedError
def add_cgi_vars(self):
"""Override in subclass to insert CGI variables in 'self.environ'"""
raise NotImplementedError
class SimpleHandler(BaseHandler):
"""Handler that's just initialized with streams, environment, etc.
This handler subclass is intended for synchronous HTTP/1.0 origin servers,
and handles sending the entire response output, given the correct inputs.
Usage::
handler = SimpleHandler(
inp,out,err,env, multithread=False, multiprocess=True
)
handler.run(app)"""
def __init__(self,stdin,stdout,stderr,environ,
multithread=True, multiprocess=False
):
self.stdin = stdin
self.stdout = stdout
self.stderr = stderr
self.base_env = environ
self.wsgi_multithread = multithread
self.wsgi_multiprocess = multiprocess
def get_stdin(self):
return self.stdin
def get_stderr(self):
return self.stderr
def add_cgi_vars(self):
self.environ.update(self.base_env)
def _write(self,data):
result = self.stdout.write(data)
if result is None or result == len(data):
return
from warnings import warn
warn("SimpleHandler.stdout.write() should not do partial writes",
DeprecationWarning)
while True:
data = data[result:]
if not data:
break
result = self.stdout.write(data)
def _flush(self):
self.stdout.flush()
self._flush = self.stdout.flush
class BaseCGIHandler(SimpleHandler):
"""CGI-like systems using input/output/error streams and environ mapping
Usage::
handler = BaseCGIHandler(inp,out,err,env)
handler.run(app)
This handler class is useful for gateway protocols like ReadyExec and
FastCGI, that have usable input/output/error streams and an environment
mapping. It's also the base class for CGIHandler, which just uses
sys.stdin, os.environ, and so on.
The constructor also takes keyword arguments 'multithread' and
'multiprocess' (defaulting to 'True' and 'False' respectively) to control
the configuration sent to the application. It sets 'origin_server' to
False (to enable CGI-like output), and assumes that 'wsgi.run_once' is
False.
"""
origin_server = False
class CGIHandler(BaseCGIHandler):
"""CGI-based invocation via sys.stdin/stdout/stderr and os.environ
Usage::
CGIHandler().run(app)
The difference between this class and BaseCGIHandler is that it always
uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and
'wsgi.multiprocess' of 'True'. It does not take any initialization
parameters, but always uses 'sys.stdin', 'os.environ', and friends.
If you need to override any of these parameters, use BaseCGIHandler
instead.
"""
wsgi_run_once = True
# Do not allow os.environ to leak between requests in Google App Engine
# and other multi-run CGI use cases. This is not easily testable.
# See http://bugs.python.org/issue7250
os_environ = {}
def __init__(self):
BaseCGIHandler.__init__(
self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
read_environ(), multithread=False, multiprocess=True
)
class IISCGIHandler(BaseCGIHandler):
"""CGI-based invocation with workaround for IIS path bug
This handler should be used in preference to CGIHandler when deploying on
Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
or metabase allowPathInfoForScriptMappings (IIS<7).
"""
wsgi_run_once = True
os_environ = {}
# By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
# the front, causing problems for WSGI applications that wish to implement
# routing. This handler strips any such duplicated path.
# IIS can be configured to pass the correct PATH_INFO, but this causes
# another bug where PATH_TRANSLATED is wrong. Luckily this variable is
# rarely used and is not guaranteed by WSGI. On IIS<7, though, the
# setting can only be made on a vhost level, affecting all other script
# mappings, many of which break when exposed to the PATH_TRANSLATED bug.
# For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
# rarely uses it because there is still no UI for it.)
# There is no way for CGI code to tell whether the option was set, so a
# separate handler class is provided.
def __init__(self):
environ= read_environ()
path = environ.get('PATH_INFO', '')
script = environ.get('SCRIPT_NAME', '')
if (path+'/').startswith(script+'/'):
environ['PATH_INFO'] = path[len(script):]
BaseCGIHandler.__init__(
self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
environ, multithread=False, multiprocess=True
)

184
Lib/wsgiref/headers.py Normal file
View File

@@ -0,0 +1,184 @@
"""Manage HTTP Response Headers
Much of this module is red-handedly pilfered from email.message in the stdlib,
so portions are Copyright (C) 2001,2002 Python Software Foundation, and were
written by Barry Warsaw.
"""
# Regular expression that matches `special' characters in parameters, the
# existence of which force quoting of the parameter value.
import re
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
def _formatparam(param, value=None, quote=1):
"""Convenience function to format and return a key=value pair.
This will quote the value if needed or if quote is true.
"""
if value is not None and len(value) > 0:
if quote or tspecials.search(value):
value = value.replace('\\', '\\\\').replace('"', r'\"')
return '%s="%s"' % (param, value)
else:
return '%s=%s' % (param, value)
else:
return param
class Headers:
"""Manage a collection of HTTP response headers"""
def __init__(self, headers=None):
headers = headers if headers is not None else []
if type(headers) is not list:
raise TypeError("Headers must be a list of name/value tuples")
self._headers = headers
if __debug__:
for k, v in headers:
self._convert_string_type(k)
self._convert_string_type(v)
def _convert_string_type(self, value):
"""Convert/check value type."""
if type(value) is str:
return value
raise AssertionError("Header names/values must be"
" of type str (got {0})".format(repr(value)))
def __len__(self):
"""Return the total number of headers, including duplicates."""
return len(self._headers)
def __setitem__(self, name, val):
"""Set the value of a header."""
del self[name]
self._headers.append(
(self._convert_string_type(name), self._convert_string_type(val)))
def __delitem__(self,name):
"""Delete all occurrences of a header, if present.
Does *not* raise an exception if the header is missing.
"""
name = self._convert_string_type(name.lower())
self._headers[:] = [kv for kv in self._headers if kv[0].lower() != name]
def __getitem__(self,name):
"""Get the first header value for 'name'
Return None if the header is missing instead of raising an exception.
Note that if the header appeared multiple times, the first exactly which
occurrence gets returned is undefined. Use getall() to get all
the values matching a header field name.
"""
return self.get(name)
def __contains__(self, name):
"""Return true if the message contains the header."""
return self.get(name) is not None
def get_all(self, name):
"""Return a list of all the values for the named field.
These will be sorted in the order they appeared in the original header
list or were added to this instance, and may contain duplicates. Any
fields deleted and re-inserted are always appended to the header list.
If no fields exist with the given name, returns an empty list.
"""
name = self._convert_string_type(name.lower())
return [kv[1] for kv in self._headers if kv[0].lower()==name]
def get(self,name,default=None):
"""Get the first header value for 'name', or return 'default'"""
name = self._convert_string_type(name.lower())
for k,v in self._headers:
if k.lower()==name:
return v
return default
def keys(self):
"""Return a list of all the header field names.
These will be sorted in the order they appeared in the original header
list, or were added to this instance, and may contain duplicates.
Any fields deleted and re-inserted are always appended to the header
list.
"""
return [k for k, v in self._headers]
def values(self):
"""Return a list of all header values.
These will be sorted in the order they appeared in the original header
list, or were added to this instance, and may contain duplicates.
Any fields deleted and re-inserted are always appended to the header
list.
"""
return [v for k, v in self._headers]
def items(self):
"""Get all the header fields and values.
These will be sorted in the order they were in the original header
list, or were added to this instance, and may contain duplicates.
Any fields deleted and re-inserted are always appended to the header
list.
"""
return self._headers[:]
def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self._headers)
def __str__(self):
"""str() returns the formatted headers, complete with end line,
suitable for direct HTTP transmission."""
return '\r\n'.join(["%s: %s" % kv for kv in self._headers]+['',''])
def __bytes__(self):
return str(self).encode('iso-8859-1')
def setdefault(self,name,value):
"""Return first matching header value for 'name', or 'value'
If there is no header named 'name', add a new header with name 'name'
and value 'value'."""
result = self.get(name)
if result is None:
self._headers.append((self._convert_string_type(name),
self._convert_string_type(value)))
return value
else:
return result
def add_header(self, _name, _value, **_params):
"""Extended header setting.
_name is the header field to add. keyword arguments can be used to set
additional parameters for the header field, with underscores converted
to dashes. Normally the parameter will be added as key="value" unless
value is None, in which case only the key will be added.
Example:
h.add_header('content-disposition', 'attachment', filename='bud.gif')
Note that unlike the corresponding 'email.message' method, this does
*not* handle '(charset, language, value)' tuples: all values must be
strings or None.
"""
parts = []
if _value is not None:
_value = self._convert_string_type(_value)
parts.append(_value)
for k, v in _params.items():
k = self._convert_string_type(k)
if v is None:
parts.append(k.replace('_', '-'))
else:
v = self._convert_string_type(v)
parts.append(_formatparam(k.replace('_', '-'), v))
self._headers.append((self._convert_string_type(_name), "; ".join(parts)))

View File

@@ -0,0 +1,164 @@
"""BaseHTTPServer that implements the Python WSGI protocol (PEP 3333)
This is both an example of how WSGI can be implemented, and a basis for running
simple web applications on a local machine, such as might be done when testing
or debugging an application. It has not been reviewed for security issues,
however, and we strongly recommend that you use a "real" web server for
production use.
For example usage, see the 'if __name__=="__main__"' block at the end of the
module. See also the BaseHTTPServer module docs for other API information.
"""
from http.server import BaseHTTPRequestHandler, HTTPServer
import sys
import urllib.parse
from wsgiref.handlers import SimpleHandler
from platform import python_implementation
__version__ = "0.2"
__all__ = ['WSGIServer', 'WSGIRequestHandler', 'demo_app', 'make_server']
server_version = "WSGIServer/" + __version__
sys_version = python_implementation() + "/" + sys.version.split()[0]
software_version = server_version + ' ' + sys_version
class ServerHandler(SimpleHandler):
server_software = software_version
def close(self):
try:
self.request_handler.log_request(
self.status.split(' ',1)[0], self.bytes_sent
)
finally:
SimpleHandler.close(self)
class WSGIServer(HTTPServer):
"""BaseHTTPServer that implements the Python WSGI protocol"""
application = None
def server_bind(self):
"""Override server_bind to store the server name."""
HTTPServer.server_bind(self)
self.setup_environ()
def setup_environ(self):
# Set up base environment
env = self.base_environ = {}
env['SERVER_NAME'] = self.server_name
env['GATEWAY_INTERFACE'] = 'CGI/1.1'
env['SERVER_PORT'] = str(self.server_port)
env['REMOTE_HOST']=''
env['CONTENT_LENGTH']=''
env['SCRIPT_NAME'] = ''
def get_app(self):
return self.application
def set_app(self,application):
self.application = application
class WSGIRequestHandler(BaseHTTPRequestHandler):
server_version = "WSGIServer/" + __version__
def get_environ(self):
env = self.server.base_environ.copy()
env['SERVER_PROTOCOL'] = self.request_version
env['SERVER_SOFTWARE'] = self.server_version
env['REQUEST_METHOD'] = self.command
if '?' in self.path:
path,query = self.path.split('?',1)
else:
path,query = self.path,''
env['PATH_INFO'] = urllib.parse.unquote(path, 'iso-8859-1')
env['QUERY_STRING'] = query
host = self.address_string()
if host != self.client_address[0]:
env['REMOTE_HOST'] = host
env['REMOTE_ADDR'] = self.client_address[0]
if self.headers.get('content-type') is None:
env['CONTENT_TYPE'] = self.headers.get_content_type()
else:
env['CONTENT_TYPE'] = self.headers['content-type']
length = self.headers.get('content-length')
if length:
env['CONTENT_LENGTH'] = length
for k, v in self.headers.items():
k=k.replace('-','_').upper(); v=v.strip()
if k in env:
continue # skip content length, type,etc.
if 'HTTP_'+k in env:
env['HTTP_'+k] += ','+v # comma-separate multiple headers
else:
env['HTTP_'+k] = v
return env
def get_stderr(self):
return sys.stderr
def handle(self):
"""Handle a single HTTP request"""
self.raw_requestline = self.rfile.readline(65537)
if len(self.raw_requestline) > 65536:
self.requestline = ''
self.request_version = ''
self.command = ''
self.send_error(414)
return
if not self.parse_request(): # An error code has been sent, just exit
return
handler = ServerHandler(
self.rfile, self.wfile, self.get_stderr(), self.get_environ()
)
handler.request_handler = self # backpointer for logging
handler.run(self.server.get_app())
def demo_app(environ,start_response):
from io import StringIO
stdout = StringIO()
print("Hello world!", file=stdout)
print(file=stdout)
h = sorted(environ.items())
for k,v in h:
print(k,'=',repr(v), file=stdout)
start_response("200 OK", [('Content-Type','text/plain; charset=utf-8')])
return [stdout.getvalue().encode("utf-8")]
def make_server(
host, port, app, server_class=WSGIServer, handler_class=WSGIRequestHandler
):
"""Create a new WSGI server listening on `host` and `port` for `app`"""
server = server_class((host, port), handler_class)
server.set_app(app)
return server
if __name__ == '__main__':
with make_server('', 8000, demo_app) as httpd:
sa = httpd.socket.getsockname()
print("Serving HTTP on", sa[0], "port", sa[1], "...")
import webbrowser
webbrowser.open('http://localhost:8000/xyz?abc')
httpd.handle_request() # serve one request, then exit

165
Lib/wsgiref/util.py Normal file
View File

@@ -0,0 +1,165 @@
"""Miscellaneous WSGI-related Utilities"""
import posixpath
__all__ = [
'FileWrapper', 'guess_scheme', 'application_uri', 'request_uri',
'shift_path_info', 'setup_testing_defaults',
]
class FileWrapper:
"""Wrapper to convert file-like objects to iterables"""
def __init__(self, filelike, blksize=8192):
self.filelike = filelike
self.blksize = blksize
if hasattr(filelike,'close'):
self.close = filelike.close
def __getitem__(self,key):
data = self.filelike.read(self.blksize)
if data:
return data
raise IndexError
def __iter__(self):
return self
def __next__(self):
data = self.filelike.read(self.blksize)
if data:
return data
raise StopIteration
def guess_scheme(environ):
"""Return a guess for whether 'wsgi.url_scheme' should be 'http' or 'https'
"""
if environ.get("HTTPS") in ('yes','on','1'):
return 'https'
else:
return 'http'
def application_uri(environ):
"""Return the application's base URI (no PATH_INFO or QUERY_STRING)"""
url = environ['wsgi.url_scheme']+'://'
from urllib.parse import quote
if environ.get('HTTP_HOST'):
url += environ['HTTP_HOST']
else:
url += environ['SERVER_NAME']
if environ['wsgi.url_scheme'] == 'https':
if environ['SERVER_PORT'] != '443':
url += ':' + environ['SERVER_PORT']
else:
if environ['SERVER_PORT'] != '80':
url += ':' + environ['SERVER_PORT']
url += quote(environ.get('SCRIPT_NAME') or '/', encoding='latin1')
return url
def request_uri(environ, include_query=True):
"""Return the full request URI, optionally including the query string"""
url = application_uri(environ)
from urllib.parse import quote
path_info = quote(environ.get('PATH_INFO',''), safe='/;=,', encoding='latin1')
if not environ.get('SCRIPT_NAME'):
url += path_info[1:]
else:
url += path_info
if include_query and environ.get('QUERY_STRING'):
url += '?' + environ['QUERY_STRING']
return url
def shift_path_info(environ):
"""Shift a name from PATH_INFO to SCRIPT_NAME, returning it
If there are no remaining path segments in PATH_INFO, return None.
Note: 'environ' is modified in-place; use a copy if you need to keep
the original PATH_INFO or SCRIPT_NAME.
Note: when PATH_INFO is just a '/', this returns '' and appends a trailing
'/' to SCRIPT_NAME, even though empty path segments are normally ignored,
and SCRIPT_NAME doesn't normally end in a '/'. This is intentional
behavior, to ensure that an application can tell the difference between
'/x' and '/x/' when traversing to objects.
"""
path_info = environ.get('PATH_INFO','')
if not path_info:
return None
path_parts = path_info.split('/')
path_parts[1:-1] = [p for p in path_parts[1:-1] if p and p != '.']
name = path_parts[1]
del path_parts[1]
script_name = environ.get('SCRIPT_NAME','')
script_name = posixpath.normpath(script_name+'/'+name)
if script_name.endswith('/'):
script_name = script_name[:-1]
if not name and not script_name.endswith('/'):
script_name += '/'
environ['SCRIPT_NAME'] = script_name
environ['PATH_INFO'] = '/'.join(path_parts)
# Special case: '/.' on PATH_INFO doesn't get stripped,
# because we don't strip the last element of PATH_INFO
# if there's only one path part left. Instead of fixing this
# above, we fix it here so that PATH_INFO gets normalized to
# an empty string in the environ.
if name=='.':
name = None
return name
def setup_testing_defaults(environ):
"""Update 'environ' with trivial defaults for testing purposes
This adds various parameters required for WSGI, including HTTP_HOST,
SERVER_NAME, SERVER_PORT, REQUEST_METHOD, SCRIPT_NAME, PATH_INFO,
and all of the wsgi.* variables. It only supplies default values,
and does not replace any existing settings for these variables.
This routine is intended to make it easier for unit tests of WSGI
servers and applications to set up dummy environments. It should *not*
be used by actual WSGI servers or applications, since the data is fake!
"""
environ.setdefault('SERVER_NAME','127.0.0.1')
environ.setdefault('SERVER_PROTOCOL','HTTP/1.0')
environ.setdefault('HTTP_HOST',environ['SERVER_NAME'])
environ.setdefault('REQUEST_METHOD','GET')
if 'SCRIPT_NAME' not in environ and 'PATH_INFO' not in environ:
environ.setdefault('SCRIPT_NAME','')
environ.setdefault('PATH_INFO','/')
environ.setdefault('wsgi.version', (1,0))
environ.setdefault('wsgi.run_once', 0)
environ.setdefault('wsgi.multithread', 0)
environ.setdefault('wsgi.multiprocess', 0)
from io import StringIO, BytesIO
environ.setdefault('wsgi.input', BytesIO())
environ.setdefault('wsgi.errors', StringIO())
environ.setdefault('wsgi.url_scheme',guess_scheme(environ))
if environ['wsgi.url_scheme']=='http':
environ.setdefault('SERVER_PORT', '80')
elif environ['wsgi.url_scheme']=='https':
environ.setdefault('SERVER_PORT', '443')
_hoppish = {
'connection':1, 'keep-alive':1, 'proxy-authenticate':1,
'proxy-authorization':1, 'te':1, 'trailers':1, 'transfer-encoding':1,
'upgrade':1
}.__contains__
def is_hop_by_hop(header_name):
"""Return true if 'header_name' is an HTTP/1.1 "Hop-by-Hop" header"""
return _hoppish(header_name.lower())

443
Lib/wsgiref/validate.py Normal file
View File

@@ -0,0 +1,443 @@
# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
# Also licenced under the Apache License, 2.0: http://opensource.org/licenses/apache2.0.php
# Licensed to PSF under a Contributor Agreement
"""
Middleware to check for obedience to the WSGI specification.
Some of the things this checks:
* Signature of the application and start_response (including that
keyword arguments are not used).
* Environment checks:
- Environment is a dictionary (and not a subclass).
- That all the required keys are in the environment: REQUEST_METHOD,
SERVER_NAME, SERVER_PORT, wsgi.version, wsgi.input, wsgi.errors,
wsgi.multithread, wsgi.multiprocess, wsgi.run_once
- That HTTP_CONTENT_TYPE and HTTP_CONTENT_LENGTH are not in the
environment (these headers should appear as CONTENT_LENGTH and
CONTENT_TYPE).
- Warns if QUERY_STRING is missing, as the cgi module acts
unpredictably in that case.
- That CGI-style variables (that don't contain a .) have
(non-unicode) string values
- That wsgi.version is a tuple
- That wsgi.url_scheme is 'http' or 'https' (@@: is this too
restrictive?)
- Warns if the REQUEST_METHOD is not known (@@: probably too
restrictive).
- That SCRIPT_NAME and PATH_INFO are empty or start with /
- That at least one of SCRIPT_NAME or PATH_INFO are set.
- That CONTENT_LENGTH is a positive integer.
- That SCRIPT_NAME is not '/' (it should be '', and PATH_INFO should
be '/').
- That wsgi.input has the methods read, readline, readlines, and
__iter__
- That wsgi.errors has the methods flush, write, writelines
* The status is a string, contains a space, starts with an integer,
and that integer is in range (> 100).
* That the headers is a list (not a subclass, not another kind of
sequence).
* That the items of the headers are tuples of strings.
* That there is no 'status' header (that is used in CGI, but not in
WSGI).
* That the headers don't contain newlines or colons, end in _ or -, or
contain characters codes below 037.
* That Content-Type is given if there is content (CGI often has a
default content type, but WSGI does not).
* That no Content-Type is given when there is no content (@@: is this
too restrictive?)
* That the exc_info argument to start_response is a tuple or None.
* That all calls to the writer are with strings, and no other methods
on the writer are accessed.
* That wsgi.input is used properly:
- .read() is called with zero or one argument
- That it returns a string
- That readline, readlines, and __iter__ return strings
- That .close() is not called
- No other methods are provided
* That wsgi.errors is used properly:
- .write() and .writelines() is called with a string
- That .close() is not called, and no other methods are provided.
* The response iterator:
- That it is not a string (it should be a list of a single string; a
string will work, but perform horribly).
- That .__next__() returns a string
- That the iterator is not iterated over until start_response has
been called (that can signal either a server or application
error).
- That .close() is called (doesn't raise exception, only prints to
sys.stderr, because we only know it isn't called when the object
is garbage collected).
"""
__all__ = ['validator']
import re
import sys
import warnings
header_re = re.compile(r'^[a-zA-Z][a-zA-Z0-9\-_]*$')
bad_header_value_re = re.compile(r'[\000-\037]')
class WSGIWarning(Warning):
"""
Raised in response to WSGI-spec-related warnings
"""
def assert_(cond, *args):
if not cond:
raise AssertionError(*args)
def check_string_type(value, title):
if type (value) is str:
return value
raise AssertionError(
"{0} must be of type str (got {1})".format(title, repr(value)))
def validator(application):
"""
When applied between a WSGI server and a WSGI application, this
middleware will check for WSGI compliancy on a number of levels.
This middleware does not modify the request or response in any
way, but will raise an AssertionError if anything seems off
(except for a failure to close the application iterator, which
will be printed to stderr -- there's no way to raise an exception
at that point).
"""
def lint_app(*args, **kw):
assert_(len(args) == 2, "Two arguments required")
assert_(not kw, "No keyword arguments allowed")
environ, start_response = args
check_environ(environ)
# We use this to check if the application returns without
# calling start_response:
start_response_started = []
def start_response_wrapper(*args, **kw):
assert_(len(args) == 2 or len(args) == 3, (
"Invalid number of arguments: %s" % (args,)))
assert_(not kw, "No keyword arguments allowed")
status = args[0]
headers = args[1]
if len(args) == 3:
exc_info = args[2]
else:
exc_info = None
check_status(status)
check_headers(headers)
check_content_type(status, headers)
check_exc_info(exc_info)
start_response_started.append(None)
return WriteWrapper(start_response(*args))
environ['wsgi.input'] = InputWrapper(environ['wsgi.input'])
environ['wsgi.errors'] = ErrorWrapper(environ['wsgi.errors'])
iterator = application(environ, start_response_wrapper)
assert_(iterator is not None and iterator != False,
"The application must return an iterator, if only an empty list")
check_iterator(iterator)
return IteratorWrapper(iterator, start_response_started)
return lint_app
class InputWrapper:
def __init__(self, wsgi_input):
self.input = wsgi_input
def read(self, *args):
assert_(len(args) == 1)
v = self.input.read(*args)
assert_(type(v) is bytes)
return v
def readline(self, *args):
assert_(len(args) <= 1)
v = self.input.readline(*args)
assert_(type(v) is bytes)
return v
def readlines(self, *args):
assert_(len(args) <= 1)
lines = self.input.readlines(*args)
assert_(type(lines) is list)
for line in lines:
assert_(type(line) is bytes)
return lines
def __iter__(self):
while 1:
line = self.readline()
if not line:
return
yield line
def close(self):
assert_(0, "input.close() must not be called")
class ErrorWrapper:
def __init__(self, wsgi_errors):
self.errors = wsgi_errors
def write(self, s):
assert_(type(s) is str)
self.errors.write(s)
def flush(self):
self.errors.flush()
def writelines(self, seq):
for line in seq:
self.write(line)
def close(self):
assert_(0, "errors.close() must not be called")
class WriteWrapper:
def __init__(self, wsgi_writer):
self.writer = wsgi_writer
def __call__(self, s):
assert_(type(s) is bytes)
self.writer(s)
class PartialIteratorWrapper:
def __init__(self, wsgi_iterator):
self.iterator = wsgi_iterator
def __iter__(self):
# We want to make sure __iter__ is called
return IteratorWrapper(self.iterator, None)
class IteratorWrapper:
def __init__(self, wsgi_iterator, check_start_response):
self.original_iterator = wsgi_iterator
self.iterator = iter(wsgi_iterator)
self.closed = False
self.check_start_response = check_start_response
def __iter__(self):
return self
def __next__(self):
assert_(not self.closed,
"Iterator read after closed")
v = next(self.iterator)
if type(v) is not bytes:
assert_(False, "Iterator yielded non-bytestring (%r)" % (v,))
if self.check_start_response is not None:
assert_(self.check_start_response,
"The application returns and we started iterating over its body, but start_response has not yet been called")
self.check_start_response = None
return v
def close(self):
self.closed = True
if hasattr(self.original_iterator, 'close'):
self.original_iterator.close()
def __del__(self):
if not self.closed:
sys.stderr.write(
"Iterator garbage collected without being closed")
assert_(self.closed,
"Iterator garbage collected without being closed")
def check_environ(environ):
assert_(type(environ) is dict,
"Environment is not of the right type: %r (environment: %r)"
% (type(environ), environ))
for key in ['REQUEST_METHOD', 'SERVER_NAME', 'SERVER_PORT',
'wsgi.version', 'wsgi.input', 'wsgi.errors',
'wsgi.multithread', 'wsgi.multiprocess',
'wsgi.run_once']:
assert_(key in environ,
"Environment missing required key: %r" % (key,))
for key in ['HTTP_CONTENT_TYPE', 'HTTP_CONTENT_LENGTH']:
assert_(key not in environ,
"Environment should not have the key: %s "
"(use %s instead)" % (key, key[5:]))
if 'QUERY_STRING' not in environ:
warnings.warn(
'QUERY_STRING is not in the WSGI environment; the cgi '
'module will use sys.argv when this variable is missing, '
'so application errors are more likely',
WSGIWarning)
for key in environ.keys():
if '.' in key:
# Extension, we don't care about its type
continue
assert_(type(environ[key]) is str,
"Environmental variable %s is not a string: %r (value: %r)"
% (key, type(environ[key]), environ[key]))
assert_(type(environ['wsgi.version']) is tuple,
"wsgi.version should be a tuple (%r)" % (environ['wsgi.version'],))
assert_(environ['wsgi.url_scheme'] in ('http', 'https'),
"wsgi.url_scheme unknown: %r" % environ['wsgi.url_scheme'])
check_input(environ['wsgi.input'])
check_errors(environ['wsgi.errors'])
# @@: these need filling out:
if environ['REQUEST_METHOD'] not in (
'GET', 'HEAD', 'POST', 'OPTIONS', 'PATCH', 'PUT', 'DELETE', 'TRACE'):
warnings.warn(
"Unknown REQUEST_METHOD: %r" % environ['REQUEST_METHOD'],
WSGIWarning)
assert_(not environ.get('SCRIPT_NAME')
or environ['SCRIPT_NAME'].startswith('/'),
"SCRIPT_NAME doesn't start with /: %r" % environ['SCRIPT_NAME'])
assert_(not environ.get('PATH_INFO')
or environ['PATH_INFO'].startswith('/'),
"PATH_INFO doesn't start with /: %r" % environ['PATH_INFO'])
if environ.get('CONTENT_LENGTH'):
assert_(int(environ['CONTENT_LENGTH']) >= 0,
"Invalid CONTENT_LENGTH: %r" % environ['CONTENT_LENGTH'])
if not environ.get('SCRIPT_NAME'):
assert_('PATH_INFO' in environ,
"One of SCRIPT_NAME or PATH_INFO are required (PATH_INFO "
"should at least be '/' if SCRIPT_NAME is empty)")
assert_(environ.get('SCRIPT_NAME') != '/',
"SCRIPT_NAME cannot be '/'; it should instead be '', and "
"PATH_INFO should be '/'")
def check_input(wsgi_input):
for attr in ['read', 'readline', 'readlines', '__iter__']:
assert_(hasattr(wsgi_input, attr),
"wsgi.input (%r) doesn't have the attribute %s"
% (wsgi_input, attr))
def check_errors(wsgi_errors):
for attr in ['flush', 'write', 'writelines']:
assert_(hasattr(wsgi_errors, attr),
"wsgi.errors (%r) doesn't have the attribute %s"
% (wsgi_errors, attr))
def check_status(status):
status = check_string_type(status, "Status")
# Implicitly check that we can turn it into an integer:
status_code = status.split(None, 1)[0]
assert_(len(status_code) == 3,
"Status codes must be three characters: %r" % status_code)
status_int = int(status_code)
assert_(status_int >= 100, "Status code is invalid: %r" % status_int)
if len(status) < 4 or status[3] != ' ':
warnings.warn(
"The status string (%r) should be a three-digit integer "
"followed by a single space and a status explanation"
% status, WSGIWarning)
def check_headers(headers):
assert_(type(headers) is list,
"Headers (%r) must be of type list: %r"
% (headers, type(headers)))
header_names = {}
for item in headers:
assert_(type(item) is tuple,
"Individual headers (%r) must be of type tuple: %r"
% (item, type(item)))
assert_(len(item) == 2)
name, value = item
name = check_string_type(name, "Header name")
value = check_string_type(value, "Header value")
assert_(name.lower() != 'status',
"The Status header cannot be used; it conflicts with CGI "
"script, and HTTP status is not given through headers "
"(value: %r)." % value)
header_names[name.lower()] = None
assert_('\n' not in name and ':' not in name,
"Header names may not contain ':' or '\\n': %r" % name)
assert_(header_re.search(name), "Bad header name: %r" % name)
assert_(not name.endswith('-') and not name.endswith('_'),
"Names may not end in '-' or '_': %r" % name)
if bad_header_value_re.search(value):
assert_(0, "Bad header value: %r (bad char: %r)"
% (value, bad_header_value_re.search(value).group(0)))
def check_content_type(status, headers):
status = check_string_type(status, "Status")
code = int(status.split(None, 1)[0])
# @@: need one more person to verify this interpretation of RFC 2616
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
NO_MESSAGE_BODY = (204, 304)
for name, value in headers:
name = check_string_type(name, "Header name")
if name.lower() == 'content-type':
if code not in NO_MESSAGE_BODY:
return
assert_(0, ("Content-Type header found in a %s response, "
"which must not return content.") % code)
if code not in NO_MESSAGE_BODY:
assert_(0, "No Content-Type header found in headers (%s)" % headers)
def check_exc_info(exc_info):
assert_(exc_info is None or type(exc_info) is tuple,
"exc_info (%r) is not a tuple: %r" % (exc_info, type(exc_info)))
# More exc_info checks?
def check_iterator(iterator):
# Technically a bytestring is legal, which is why it's a really bad
# idea, because it may cause the response to be returned
# character-by-character
assert_(not isinstance(iterator, (str, bytes)),
"You should not return a string as your application iterator, "
"instead return a single-item list containing a bytestring.")

20
Lib/xml/__init__.py Normal file
View File

@@ -0,0 +1,20 @@
"""Core XML support for Python.
This package contains four sub-packages:
dom -- The W3C Document Object Model. This supports DOM Level 1 +
Namespaces.
parsers -- Python wrappers for XML parsers (currently only supports Expat).
sax -- The Simple API for XML, developed by XML-Dev, led by David
Megginson and ported to Python by Lars Marius Garshol. This
supports the SAX 2 API.
etree -- The ElementTree XML library. This is a subset of the full
ElementTree XML release.
"""
__all__ = ["dom", "parsers", "sax", "etree"]

27
Lib/xml/dom/NodeFilter.py Normal file
View File

@@ -0,0 +1,27 @@
# This is the Python mapping for interface NodeFilter from
# DOM2-Traversal-Range. It contains only constants.
class NodeFilter:
"""
This is the DOM2 NodeFilter interface. It contains only constants.
"""
FILTER_ACCEPT = 1
FILTER_REJECT = 2
FILTER_SKIP = 3
SHOW_ALL = 0xFFFFFFFF
SHOW_ELEMENT = 0x00000001
SHOW_ATTRIBUTE = 0x00000002
SHOW_TEXT = 0x00000004
SHOW_CDATA_SECTION = 0x00000008
SHOW_ENTITY_REFERENCE = 0x00000010
SHOW_ENTITY = 0x00000020
SHOW_PROCESSING_INSTRUCTION = 0x00000040
SHOW_COMMENT = 0x00000080
SHOW_DOCUMENT = 0x00000100
SHOW_DOCUMENT_TYPE = 0x00000200
SHOW_DOCUMENT_FRAGMENT = 0x00000400
SHOW_NOTATION = 0x00000800
def acceptNode(self, node):
raise NotImplementedError

140
Lib/xml/dom/__init__.py Normal file
View File

@@ -0,0 +1,140 @@
"""W3C Document Object Model implementation for Python.
The Python mapping of the Document Object Model is documented in the
Python Library Reference in the section on the xml.dom package.
This package contains the following modules:
minidom -- A simple implementation of the Level 1 DOM with namespace
support added (based on the Level 2 specification) and other
minor Level 2 functionality.
pulldom -- DOM builder supporting on-demand tree-building for selected
subtrees of the document.
"""
class Node:
"""Class giving the NodeType constants."""
__slots__ = ()
# DOM implementations may use this as a base class for their own
# Node implementations. If they don't, the constants defined here
# should still be used as the canonical definitions as they match
# the values given in the W3C recommendation. Client code can
# safely refer to these values in all tests of Node.nodeType
# values.
ELEMENT_NODE = 1
ATTRIBUTE_NODE = 2
TEXT_NODE = 3
CDATA_SECTION_NODE = 4
ENTITY_REFERENCE_NODE = 5
ENTITY_NODE = 6
PROCESSING_INSTRUCTION_NODE = 7
COMMENT_NODE = 8
DOCUMENT_NODE = 9
DOCUMENT_TYPE_NODE = 10
DOCUMENT_FRAGMENT_NODE = 11
NOTATION_NODE = 12
#ExceptionCode
INDEX_SIZE_ERR = 1
DOMSTRING_SIZE_ERR = 2
HIERARCHY_REQUEST_ERR = 3
WRONG_DOCUMENT_ERR = 4
INVALID_CHARACTER_ERR = 5
NO_DATA_ALLOWED_ERR = 6
NO_MODIFICATION_ALLOWED_ERR = 7
NOT_FOUND_ERR = 8
NOT_SUPPORTED_ERR = 9
INUSE_ATTRIBUTE_ERR = 10
INVALID_STATE_ERR = 11
SYNTAX_ERR = 12
INVALID_MODIFICATION_ERR = 13
NAMESPACE_ERR = 14
INVALID_ACCESS_ERR = 15
VALIDATION_ERR = 16
class DOMException(Exception):
"""Abstract base class for DOM exceptions.
Exceptions with specific codes are specializations of this class."""
def __init__(self, *args, **kw):
if self.__class__ is DOMException:
raise RuntimeError(
"DOMException should not be instantiated directly")
Exception.__init__(self, *args, **kw)
def _get_code(self):
return self.code
class IndexSizeErr(DOMException):
code = INDEX_SIZE_ERR
class DomstringSizeErr(DOMException):
code = DOMSTRING_SIZE_ERR
class HierarchyRequestErr(DOMException):
code = HIERARCHY_REQUEST_ERR
class WrongDocumentErr(DOMException):
code = WRONG_DOCUMENT_ERR
class InvalidCharacterErr(DOMException):
code = INVALID_CHARACTER_ERR
class NoDataAllowedErr(DOMException):
code = NO_DATA_ALLOWED_ERR
class NoModificationAllowedErr(DOMException):
code = NO_MODIFICATION_ALLOWED_ERR
class NotFoundErr(DOMException):
code = NOT_FOUND_ERR
class NotSupportedErr(DOMException):
code = NOT_SUPPORTED_ERR
class InuseAttributeErr(DOMException):
code = INUSE_ATTRIBUTE_ERR
class InvalidStateErr(DOMException):
code = INVALID_STATE_ERR
class SyntaxErr(DOMException):
code = SYNTAX_ERR
class InvalidModificationErr(DOMException):
code = INVALID_MODIFICATION_ERR
class NamespaceErr(DOMException):
code = NAMESPACE_ERR
class InvalidAccessErr(DOMException):
code = INVALID_ACCESS_ERR
class ValidationErr(DOMException):
code = VALIDATION_ERR
class UserDataHandler:
"""Class giving the operation constants for UserDataHandler.handle()."""
# Based on DOM Level 3 (WD 9 April 2002)
NODE_CLONED = 1
NODE_IMPORTED = 2
NODE_DELETED = 3
NODE_RENAMED = 4
XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
EMPTY_NAMESPACE = None
EMPTY_PREFIX = None
from .domreg import getDOMImplementation, registerDOMImplementation

99
Lib/xml/dom/domreg.py Normal file
View File

@@ -0,0 +1,99 @@
"""Registration facilities for DOM. This module should not be used
directly. Instead, the functions getDOMImplementation and
registerDOMImplementation should be imported from xml.dom."""
# This is a list of well-known implementations. Well-known names
# should be published by posting to xml-sig@python.org, and are
# subsequently recorded in this file.
import sys
well_known_implementations = {
'minidom':'xml.dom.minidom',
'4DOM': 'xml.dom.DOMImplementation',
}
# DOM implementations not officially registered should register
# themselves with their
registered = {}
def registerDOMImplementation(name, factory):
"""registerDOMImplementation(name, factory)
Register the factory function with the name. The factory function
should return an object which implements the DOMImplementation
interface. The factory function can either return the same object,
or a new one (e.g. if that implementation supports some
customization)."""
registered[name] = factory
def _good_enough(dom, features):
"_good_enough(dom, features) -> Return 1 if the dom offers the features"
for f,v in features:
if not dom.hasFeature(f,v):
return 0
return 1
def getDOMImplementation(name=None, features=()):
"""getDOMImplementation(name = None, features = ()) -> DOM implementation.
Return a suitable DOM implementation. The name is either
well-known, the module name of a DOM implementation, or None. If
it is not None, imports the corresponding module and returns
DOMImplementation object if the import succeeds.
If name is not given, consider the available implementations to
find one with the required feature set. If no implementation can
be found, raise an ImportError. The features list must be a sequence
of (feature, version) pairs which are passed to hasFeature."""
import os
creator = None
mod = well_known_implementations.get(name)
if mod:
mod = __import__(mod, {}, {}, ['getDOMImplementation'])
return mod.getDOMImplementation()
elif name:
return registered[name]()
elif not sys.flags.ignore_environment and "PYTHON_DOM" in os.environ:
return getDOMImplementation(name = os.environ["PYTHON_DOM"])
# User did not specify a name, try implementations in arbitrary
# order, returning the one that has the required features
if isinstance(features, str):
features = _parse_feature_string(features)
for creator in registered.values():
dom = creator()
if _good_enough(dom, features):
return dom
for creator in well_known_implementations.keys():
try:
dom = getDOMImplementation(name = creator)
except Exception: # typically ImportError, or AttributeError
continue
if _good_enough(dom, features):
return dom
raise ImportError("no suitable DOM implementation found")
def _parse_feature_string(s):
features = []
parts = s.split()
i = 0
length = len(parts)
while i < length:
feature = parts[i]
if feature[0] in "0123456789":
raise ValueError("bad feature name: %r" % (feature,))
i = i + 1
version = None
if i < length:
v = parts[i]
if v[0] in "0123456789":
i = i + 1
version = v
features.append((feature, version))
return tuple(features)

965
Lib/xml/dom/expatbuilder.py Normal file
View File

@@ -0,0 +1,965 @@
"""Facility to use the Expat parser to load a minidom instance
from a string or file.
This avoids all the overhead of SAX and pulldom to gain performance.
"""
# Warning!
#
# This module is tightly bound to the implementation details of the
# minidom DOM and can't be used with other DOM implementations. This
# is due, in part, to a lack of appropriate methods in the DOM (there is
# no way to create Entity and Notation nodes via the DOM Level 2
# interface), and for performance. The latter is the cause of some fairly
# cryptic code.
#
# Performance hacks:
#
# - .character_data_handler() has an extra case in which continuing
# data is appended to an existing Text node; this can be a
# speedup since pyexpat can break up character data into multiple
# callbacks even though we set the buffer_text attribute on the
# parser. This also gives us the advantage that we don't need a
# separate normalization pass.
#
# - Determining that a node exists is done using an identity comparison
# with None rather than a truth test; this avoids searching for and
# calling any methods on the node object if it exists. (A rather
# nice speedup is achieved this way as well!)
from xml.dom import xmlbuilder, minidom, Node
from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE
from xml.parsers import expat
from xml.dom.minidom import _append_child, _set_attribute_node
from xml.dom.NodeFilter import NodeFilter
TEXT_NODE = Node.TEXT_NODE
CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE
DOCUMENT_NODE = Node.DOCUMENT_NODE
FILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT
FILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT
FILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP
FILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT
theDOMImplementation = minidom.getDOMImplementation()
# Expat typename -> TypeInfo
_typeinfo_map = {
"CDATA": minidom.TypeInfo(None, "cdata"),
"ENUM": minidom.TypeInfo(None, "enumeration"),
"ENTITY": minidom.TypeInfo(None, "entity"),
"ENTITIES": minidom.TypeInfo(None, "entities"),
"ID": minidom.TypeInfo(None, "id"),
"IDREF": minidom.TypeInfo(None, "idref"),
"IDREFS": minidom.TypeInfo(None, "idrefs"),
"NMTOKEN": minidom.TypeInfo(None, "nmtoken"),
"NMTOKENS": minidom.TypeInfo(None, "nmtokens"),
}
class ElementInfo(object):
__slots__ = '_attr_info', '_model', 'tagName'
def __init__(self, tagName, model=None):
self.tagName = tagName
self._attr_info = []
self._model = model
def __getstate__(self):
return self._attr_info, self._model, self.tagName
def __setstate__(self, state):
self._attr_info, self._model, self.tagName = state
def getAttributeType(self, aname):
for info in self._attr_info:
if info[1] == aname:
t = info[-2]
if t[0] == "(":
return _typeinfo_map["ENUM"]
else:
return _typeinfo_map[info[-2]]
return minidom._no_type
def getAttributeTypeNS(self, namespaceURI, localName):
return minidom._no_type
def isElementContent(self):
if self._model:
type = self._model[0]
return type not in (expat.model.XML_CTYPE_ANY,
expat.model.XML_CTYPE_MIXED)
else:
return False
def isEmpty(self):
if self._model:
return self._model[0] == expat.model.XML_CTYPE_EMPTY
else:
return False
def isId(self, aname):
for info in self._attr_info:
if info[1] == aname:
return info[-2] == "ID"
return False
def isIdNS(self, euri, ename, auri, aname):
# not sure this is meaningful
return self.isId((auri, aname))
def _intern(builder, s):
return builder._intern_setdefault(s, s)
def _parse_ns_name(builder, name):
assert ' ' in name
parts = name.split(' ')
intern = builder._intern_setdefault
if len(parts) == 3:
uri, localname, prefix = parts
prefix = intern(prefix, prefix)
qname = "%s:%s" % (prefix, localname)
qname = intern(qname, qname)
localname = intern(localname, localname)
elif len(parts) == 2:
uri, localname = parts
prefix = EMPTY_PREFIX
qname = localname = intern(localname, localname)
else:
raise ValueError("Unsupported syntax: spaces in URIs not supported: %r" % name)
return intern(uri, uri), localname, prefix, qname
class ExpatBuilder:
"""Document builder that uses Expat to build a ParsedXML.DOM document
instance."""
def __init__(self, options=None):
if options is None:
options = xmlbuilder.Options()
self._options = options
if self._options.filter is not None:
self._filter = FilterVisibilityController(self._options.filter)
else:
self._filter = None
# This *really* doesn't do anything in this case, so
# override it with something fast & minimal.
self._finish_start_element = id
self._parser = None
self.reset()
def createParser(self):
"""Create a new parser object."""
return expat.ParserCreate()
def getParser(self):
"""Return the parser object, creating a new one if needed."""
if not self._parser:
self._parser = self.createParser()
self._intern_setdefault = self._parser.intern.setdefault
self._parser.buffer_text = True
self._parser.ordered_attributes = True
self._parser.specified_attributes = True
self.install(self._parser)
return self._parser
def reset(self):
"""Free all data structures used during DOM construction."""
self.document = theDOMImplementation.createDocument(
EMPTY_NAMESPACE, None, None)
self.curNode = self.document
self._elem_info = self.document._elem_info
self._cdata = False
def install(self, parser):
"""Install the callbacks needed to build the DOM into the parser."""
# This creates circular references!
parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
parser.StartElementHandler = self.first_element_handler
parser.EndElementHandler = self.end_element_handler
parser.ProcessingInstructionHandler = self.pi_handler
if self._options.entities:
parser.EntityDeclHandler = self.entity_decl_handler
parser.NotationDeclHandler = self.notation_decl_handler
if self._options.comments:
parser.CommentHandler = self.comment_handler
if self._options.cdata_sections:
parser.StartCdataSectionHandler = self.start_cdata_section_handler
parser.EndCdataSectionHandler = self.end_cdata_section_handler
parser.CharacterDataHandler = self.character_data_handler_cdata
else:
parser.CharacterDataHandler = self.character_data_handler
parser.ExternalEntityRefHandler = self.external_entity_ref_handler
parser.XmlDeclHandler = self.xml_decl_handler
parser.ElementDeclHandler = self.element_decl_handler
parser.AttlistDeclHandler = self.attlist_decl_handler
def parseFile(self, file):
"""Parse a document from a file object, returning the document
node."""
parser = self.getParser()
first_buffer = True
try:
while 1:
buffer = file.read(16*1024)
if not buffer:
break
parser.Parse(buffer, 0)
if first_buffer and self.document.documentElement:
self._setup_subset(buffer)
first_buffer = False
parser.Parse("", True)
except ParseEscape:
pass
doc = self.document
self.reset()
self._parser = None
return doc
def parseString(self, string):
"""Parse a document from a string, returning the document node."""
parser = self.getParser()
try:
parser.Parse(string, True)
self._setup_subset(string)
except ParseEscape:
pass
doc = self.document
self.reset()
self._parser = None
return doc
def _setup_subset(self, buffer):
"""Load the internal subset if there might be one."""
if self.document.doctype:
extractor = InternalSubsetExtractor()
extractor.parseString(buffer)
subset = extractor.getSubset()
self.document.doctype.internalSubset = subset
def start_doctype_decl_handler(self, doctypeName, systemId, publicId,
has_internal_subset):
doctype = self.document.implementation.createDocumentType(
doctypeName, publicId, systemId)
doctype.ownerDocument = self.document
_append_child(self.document, doctype)
self.document.doctype = doctype
if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT:
self.document.doctype = None
del self.document.childNodes[-1]
doctype = None
self._parser.EntityDeclHandler = None
self._parser.NotationDeclHandler = None
if has_internal_subset:
if doctype is not None:
doctype.entities._seq = []
doctype.notations._seq = []
self._parser.CommentHandler = None
self._parser.ProcessingInstructionHandler = None
self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
def end_doctype_decl_handler(self):
if self._options.comments:
self._parser.CommentHandler = self.comment_handler
self._parser.ProcessingInstructionHandler = self.pi_handler
if not (self._elem_info or self._filter):
self._finish_end_element = id
def pi_handler(self, target, data):
node = self.document.createProcessingInstruction(target, data)
_append_child(self.curNode, node)
if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
self.curNode.removeChild(node)
def character_data_handler_cdata(self, data):
childNodes = self.curNode.childNodes
if self._cdata:
if ( self._cdata_continue
and childNodes[-1].nodeType == CDATA_SECTION_NODE):
childNodes[-1].appendData(data)
return
node = self.document.createCDATASection(data)
self._cdata_continue = True
elif childNodes and childNodes[-1].nodeType == TEXT_NODE:
node = childNodes[-1]
value = node.data + data
node.data = value
return
else:
node = minidom.Text()
node.data = data
node.ownerDocument = self.document
_append_child(self.curNode, node)
def character_data_handler(self, data):
childNodes = self.curNode.childNodes
if childNodes and childNodes[-1].nodeType == TEXT_NODE:
node = childNodes[-1]
node.data = node.data + data
return
node = minidom.Text()
node.data = node.data + data
node.ownerDocument = self.document
_append_child(self.curNode, node)
def entity_decl_handler(self, entityName, is_parameter_entity, value,
base, systemId, publicId, notationName):
if is_parameter_entity:
# we don't care about parameter entities for the DOM
return
if not self._options.entities:
return
node = self.document._create_entity(entityName, publicId,
systemId, notationName)
if value is not None:
# internal entity
# node *should* be readonly, but we'll cheat
child = self.document.createTextNode(value)
node.childNodes.append(child)
self.document.doctype.entities._seq.append(node)
if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
del self.document.doctype.entities._seq[-1]
def notation_decl_handler(self, notationName, base, systemId, publicId):
node = self.document._create_notation(notationName, publicId, systemId)
self.document.doctype.notations._seq.append(node)
if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT:
del self.document.doctype.notations._seq[-1]
def comment_handler(self, data):
node = self.document.createComment(data)
_append_child(self.curNode, node)
if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
self.curNode.removeChild(node)
def start_cdata_section_handler(self):
self._cdata = True
self._cdata_continue = False
def end_cdata_section_handler(self):
self._cdata = False
self._cdata_continue = False
def external_entity_ref_handler(self, context, base, systemId, publicId):
return 1
def first_element_handler(self, name, attributes):
if self._filter is None and not self._elem_info:
self._finish_end_element = id
self.getParser().StartElementHandler = self.start_element_handler
self.start_element_handler(name, attributes)
def start_element_handler(self, name, attributes):
node = self.document.createElement(name)
_append_child(self.curNode, node)
self.curNode = node
if attributes:
for i in range(0, len(attributes), 2):
a = minidom.Attr(attributes[i], EMPTY_NAMESPACE,
None, EMPTY_PREFIX)
value = attributes[i+1]
a.value = value
a.ownerDocument = self.document
_set_attribute_node(node, a)
if node is not self.document.documentElement:
self._finish_start_element(node)
def _finish_start_element(self, node):
if self._filter:
# To be general, we'd have to call isSameNode(), but this
# is sufficient for minidom:
if node is self.document.documentElement:
return
filt = self._filter.startContainer(node)
if filt == FILTER_REJECT:
# ignore this node & all descendents
Rejecter(self)
elif filt == FILTER_SKIP:
# ignore this node, but make it's children become
# children of the parent node
Skipper(self)
else:
return
self.curNode = node.parentNode
node.parentNode.removeChild(node)
node.unlink()
# If this ever changes, Namespaces.end_element_handler() needs to
# be changed to match.
#
def end_element_handler(self, name):
curNode = self.curNode
self.curNode = curNode.parentNode
self._finish_end_element(curNode)
def _finish_end_element(self, curNode):
info = self._elem_info.get(curNode.tagName)
if info:
self._handle_white_text_nodes(curNode, info)
if self._filter:
if curNode is self.document.documentElement:
return
if self._filter.acceptNode(curNode) == FILTER_REJECT:
self.curNode.removeChild(curNode)
curNode.unlink()
def _handle_white_text_nodes(self, node, info):
if (self._options.whitespace_in_element_content
or not info.isElementContent()):
return
# We have element type information and should remove ignorable
# whitespace; identify for text nodes which contain only
# whitespace.
L = []
for child in node.childNodes:
if child.nodeType == TEXT_NODE and not child.data.strip():
L.append(child)
# Remove ignorable whitespace from the tree.
for child in L:
node.removeChild(child)
def element_decl_handler(self, name, model):
info = self._elem_info.get(name)
if info is None:
self._elem_info[name] = ElementInfo(name, model)
else:
assert info._model is None
info._model = model
def attlist_decl_handler(self, elem, name, type, default, required):
info = self._elem_info.get(elem)
if info is None:
info = ElementInfo(elem)
self._elem_info[elem] = info
info._attr_info.append(
[None, name, None, None, default, 0, type, required])
def xml_decl_handler(self, version, encoding, standalone):
self.document.version = version
self.document.encoding = encoding
# This is still a little ugly, thanks to the pyexpat API. ;-(
if standalone >= 0:
if standalone:
self.document.standalone = True
else:
self.document.standalone = False
# Don't include FILTER_INTERRUPT, since that's checked separately
# where allowed.
_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP)
class FilterVisibilityController(object):
"""Wrapper around a DOMBuilderFilter which implements the checks
to make the whatToShow filter attribute work."""
__slots__ = 'filter',
def __init__(self, filter):
self.filter = filter
def startContainer(self, node):
mask = self._nodetype_mask[node.nodeType]
if self.filter.whatToShow & mask:
val = self.filter.startContainer(node)
if val == FILTER_INTERRUPT:
raise ParseEscape
if val not in _ALLOWED_FILTER_RETURNS:
raise ValueError(
"startContainer() returned illegal value: " + repr(val))
return val
else:
return FILTER_ACCEPT
def acceptNode(self, node):
mask = self._nodetype_mask[node.nodeType]
if self.filter.whatToShow & mask:
val = self.filter.acceptNode(node)
if val == FILTER_INTERRUPT:
raise ParseEscape
if val == FILTER_SKIP:
# move all child nodes to the parent, and remove this node
parent = node.parentNode
for child in node.childNodes[:]:
parent.appendChild(child)
# node is handled by the caller
return FILTER_REJECT
if val not in _ALLOWED_FILTER_RETURNS:
raise ValueError(
"acceptNode() returned illegal value: " + repr(val))
return val
else:
return FILTER_ACCEPT
_nodetype_mask = {
Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT,
Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE,
Node.TEXT_NODE: NodeFilter.SHOW_TEXT,
Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION,
Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE,
Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY,
Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION,
Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT,
Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT,
Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE,
Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT,
Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION,
}
class FilterCrutch(object):
__slots__ = '_builder', '_level', '_old_start', '_old_end'
def __init__(self, builder):
self._level = 0
self._builder = builder
parser = builder._parser
self._old_start = parser.StartElementHandler
self._old_end = parser.EndElementHandler
parser.StartElementHandler = self.start_element_handler
parser.EndElementHandler = self.end_element_handler
class Rejecter(FilterCrutch):
__slots__ = ()
def __init__(self, builder):
FilterCrutch.__init__(self, builder)
parser = builder._parser
for name in ("ProcessingInstructionHandler",
"CommentHandler",
"CharacterDataHandler",
"StartCdataSectionHandler",
"EndCdataSectionHandler",
"ExternalEntityRefHandler",
):
setattr(parser, name, None)
def start_element_handler(self, *args):
self._level = self._level + 1
def end_element_handler(self, *args):
if self._level == 0:
# restore the old handlers
parser = self._builder._parser
self._builder.install(parser)
parser.StartElementHandler = self._old_start
parser.EndElementHandler = self._old_end
else:
self._level = self._level - 1
class Skipper(FilterCrutch):
__slots__ = ()
def start_element_handler(self, *args):
node = self._builder.curNode
self._old_start(*args)
if self._builder.curNode is not node:
self._level = self._level + 1
def end_element_handler(self, *args):
if self._level == 0:
# We're popping back out of the node we're skipping, so we
# shouldn't need to do anything but reset the handlers.
self._builder._parser.StartElementHandler = self._old_start
self._builder._parser.EndElementHandler = self._old_end
self._builder = None
else:
self._level = self._level - 1
self._old_end(*args)
# framework document used by the fragment builder.
# Takes a string for the doctype, subset string, and namespace attrs string.
_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \
"http://xml.python.org/entities/fragment-builder/internal"
_FRAGMENT_BUILDER_TEMPLATE = (
'''\
<!DOCTYPE wrapper
%%s [
<!ENTITY fragment-builder-internal
SYSTEM "%s">
%%s
]>
<wrapper %%s
>&fragment-builder-internal;</wrapper>'''
% _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID)
class FragmentBuilder(ExpatBuilder):
"""Builder which constructs document fragments given XML source
text and a context node.
The context node is expected to provide information about the
namespace declarations which are in scope at the start of the
fragment.
"""
def __init__(self, context, options=None):
if context.nodeType == DOCUMENT_NODE:
self.originalDocument = context
self.context = context
else:
self.originalDocument = context.ownerDocument
self.context = context
ExpatBuilder.__init__(self, options)
def reset(self):
ExpatBuilder.reset(self)
self.fragment = None
def parseFile(self, file):
"""Parse a document fragment from a file object, returning the
fragment node."""
return self.parseString(file.read())
def parseString(self, string):
"""Parse a document fragment from a string, returning the
fragment node."""
self._source = string
parser = self.getParser()
doctype = self.originalDocument.doctype
ident = ""
if doctype:
subset = doctype.internalSubset or self._getDeclarations()
if doctype.publicId:
ident = ('PUBLIC "%s" "%s"'
% (doctype.publicId, doctype.systemId))
elif doctype.systemId:
ident = 'SYSTEM "%s"' % doctype.systemId
else:
subset = ""
nsattrs = self._getNSattrs() # get ns decls from node's ancestors
document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs)
try:
parser.Parse(document, 1)
except:
self.reset()
raise
fragment = self.fragment
self.reset()
## self._parser = None
return fragment
def _getDeclarations(self):
"""Re-create the internal subset from the DocumentType node.
This is only needed if we don't already have the
internalSubset as a string.
"""
doctype = self.context.ownerDocument.doctype
s = ""
if doctype:
for i in range(doctype.notations.length):
notation = doctype.notations.item(i)
if s:
s = s + "\n "
s = "%s<!NOTATION %s" % (s, notation.nodeName)
if notation.publicId:
s = '%s PUBLIC "%s"\n "%s">' \
% (s, notation.publicId, notation.systemId)
else:
s = '%s SYSTEM "%s">' % (s, notation.systemId)
for i in range(doctype.entities.length):
entity = doctype.entities.item(i)
if s:
s = s + "\n "
s = "%s<!ENTITY %s" % (s, entity.nodeName)
if entity.publicId:
s = '%s PUBLIC "%s"\n "%s"' \
% (s, entity.publicId, entity.systemId)
elif entity.systemId:
s = '%s SYSTEM "%s"' % (s, entity.systemId)
else:
s = '%s "%s"' % (s, entity.firstChild.data)
if entity.notationName:
s = "%s NOTATION %s" % (s, entity.notationName)
s = s + ">"
return s
def _getNSattrs(self):
return ""
def external_entity_ref_handler(self, context, base, systemId, publicId):
if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID:
# this entref is the one that we made to put the subtree
# in; all of our given input is parsed in here.
old_document = self.document
old_cur_node = self.curNode
parser = self._parser.ExternalEntityParserCreate(context)
# put the real document back, parse into the fragment to return
self.document = self.originalDocument
self.fragment = self.document.createDocumentFragment()
self.curNode = self.fragment
try:
parser.Parse(self._source, 1)
finally:
self.curNode = old_cur_node
self.document = old_document
self._source = None
return -1
else:
return ExpatBuilder.external_entity_ref_handler(
self, context, base, systemId, publicId)
class Namespaces:
"""Mix-in class for builders; adds support for namespaces."""
def _initNamespaces(self):
# list of (prefix, uri) ns declarations. Namespace attrs are
# constructed from this and added to the element's attrs.
self._ns_ordered_prefixes = []
def createParser(self):
"""Create a new namespace-handling parser."""
parser = expat.ParserCreate(namespace_separator=" ")
parser.namespace_prefixes = True
return parser
def install(self, parser):
"""Insert the namespace-handlers onto the parser."""
ExpatBuilder.install(self, parser)
if self._options.namespace_declarations:
parser.StartNamespaceDeclHandler = (
self.start_namespace_decl_handler)
def start_namespace_decl_handler(self, prefix, uri):
"""Push this namespace declaration on our storage."""
self._ns_ordered_prefixes.append((prefix, uri))
def start_element_handler(self, name, attributes):
if ' ' in name:
uri, localname, prefix, qname = _parse_ns_name(self, name)
else:
uri = EMPTY_NAMESPACE
qname = name
localname = None
prefix = EMPTY_PREFIX
node = minidom.Element(qname, uri, prefix, localname)
node.ownerDocument = self.document
_append_child(self.curNode, node)
self.curNode = node
if self._ns_ordered_prefixes:
for prefix, uri in self._ns_ordered_prefixes:
if prefix:
a = minidom.Attr(_intern(self, 'xmlns:' + prefix),
XMLNS_NAMESPACE, prefix, "xmlns")
else:
a = minidom.Attr("xmlns", XMLNS_NAMESPACE,
"xmlns", EMPTY_PREFIX)
a.value = uri
a.ownerDocument = self.document
_set_attribute_node(node, a)
del self._ns_ordered_prefixes[:]
if attributes:
node._ensure_attributes()
_attrs = node._attrs
_attrsNS = node._attrsNS
for i in range(0, len(attributes), 2):
aname = attributes[i]
value = attributes[i+1]
if ' ' in aname:
uri, localname, prefix, qname = _parse_ns_name(self, aname)
a = minidom.Attr(qname, uri, localname, prefix)
_attrs[qname] = a
_attrsNS[(uri, localname)] = a
else:
a = minidom.Attr(aname, EMPTY_NAMESPACE,
aname, EMPTY_PREFIX)
_attrs[aname] = a
_attrsNS[(EMPTY_NAMESPACE, aname)] = a
a.ownerDocument = self.document
a.value = value
a.ownerElement = node
if __debug__:
# This only adds some asserts to the original
# end_element_handler(), so we only define this when -O is not
# used. If changing one, be sure to check the other to see if
# it needs to be changed as well.
#
def end_element_handler(self, name):
curNode = self.curNode
if ' ' in name:
uri, localname, prefix, qname = _parse_ns_name(self, name)
assert (curNode.namespaceURI == uri
and curNode.localName == localname
and curNode.prefix == prefix), \
"element stack messed up! (namespace)"
else:
assert curNode.nodeName == name, \
"element stack messed up - bad nodeName"
assert curNode.namespaceURI == EMPTY_NAMESPACE, \
"element stack messed up - bad namespaceURI"
self.curNode = curNode.parentNode
self._finish_end_element(curNode)
class ExpatBuilderNS(Namespaces, ExpatBuilder):
"""Document builder that supports namespaces."""
def reset(self):
ExpatBuilder.reset(self)
self._initNamespaces()
class FragmentBuilderNS(Namespaces, FragmentBuilder):
"""Fragment builder that supports namespaces."""
def reset(self):
FragmentBuilder.reset(self)
self._initNamespaces()
def _getNSattrs(self):
"""Return string of namespace attributes from this element and
ancestors."""
# XXX This needs to be re-written to walk the ancestors of the
# context to build up the namespace information from
# declarations, elements, and attributes found in context.
# Otherwise we have to store a bunch more data on the DOM
# (though that *might* be more reliable -- not clear).
attrs = ""
context = self.context
L = []
while context:
if hasattr(context, '_ns_prefix_uri'):
for prefix, uri in context._ns_prefix_uri.items():
# add every new NS decl from context to L and attrs string
if prefix in L:
continue
L.append(prefix)
if prefix:
declname = "xmlns:" + prefix
else:
declname = "xmlns"
if attrs:
attrs = "%s\n %s='%s'" % (attrs, declname, uri)
else:
attrs = " %s='%s'" % (declname, uri)
context = context.parentNode
return attrs
class ParseEscape(Exception):
"""Exception raised to short-circuit parsing in InternalSubsetExtractor."""
pass
class InternalSubsetExtractor(ExpatBuilder):
"""XML processor which can rip out the internal document type subset."""
subset = None
def getSubset(self):
"""Return the internal subset as a string."""
return self.subset
def parseFile(self, file):
try:
ExpatBuilder.parseFile(self, file)
except ParseEscape:
pass
def parseString(self, string):
try:
ExpatBuilder.parseString(self, string)
except ParseEscape:
pass
def install(self, parser):
parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
parser.StartElementHandler = self.start_element_handler
def start_doctype_decl_handler(self, name, publicId, systemId,
has_internal_subset):
if has_internal_subset:
parser = self.getParser()
self.subset = []
parser.DefaultHandler = self.subset.append
parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
else:
raise ParseEscape()
def end_doctype_decl_handler(self):
s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n')
self.subset = s
raise ParseEscape()
def start_element_handler(self, name, attrs):
raise ParseEscape()
def parse(file, namespaces=True):
"""Parse a document, returning the resulting Document node.
'file' may be either a file name or an open file object.
"""
if namespaces:
builder = ExpatBuilderNS()
else:
builder = ExpatBuilder()
if isinstance(file, str):
with open(file, 'rb') as fp:
result = builder.parseFile(fp)
else:
result = builder.parseFile(file)
return result
def parseString(string, namespaces=True):
"""Parse a document from a string, returning the resulting
Document node.
"""
if namespaces:
builder = ExpatBuilderNS()
else:
builder = ExpatBuilder()
return builder.parseString(string)
def parseFragment(file, context, namespaces=True):
"""Parse a fragment of a document, given the context from which it
was originally extracted. context should be the parent of the
node(s) which are in the fragment.
'file' may be either a file name or an open file object.
"""
if namespaces:
builder = FragmentBuilderNS(context)
else:
builder = FragmentBuilder(context)
if isinstance(file, str):
with open(file, 'rb') as fp:
result = builder.parseFile(fp)
else:
result = builder.parseFile(file)
return result
def parseFragmentString(string, context, namespaces=True):
"""Parse a fragment of a document from a string, given the context
from which it was originally extracted. context should be the
parent of the node(s) which are in the fragment.
"""
if namespaces:
builder = FragmentBuilderNS(context)
else:
builder = FragmentBuilder(context)
return builder.parseString(string)
def makeBuilder(options):
"""Create a builder based on an Options object."""
if options.namespaces:
return ExpatBuilderNS(options)
else:
return ExpatBuilder(options)

109
Lib/xml/dom/minicompat.py Normal file
View File

@@ -0,0 +1,109 @@
"""Python version compatibility support for minidom.
This module contains internal implementation details and
should not be imported; use xml.dom.minidom instead.
"""
# This module should only be imported using "import *".
#
# The following names are defined:
#
# NodeList -- lightest possible NodeList implementation
#
# EmptyNodeList -- lightest possible NodeList that is guaranteed to
# remain empty (immutable)
#
# StringTypes -- tuple of defined string types
#
# defproperty -- function used in conjunction with GetattrMagic;
# using these together is needed to make them work
# as efficiently as possible in both Python 2.2+
# and older versions. For example:
#
# class MyClass(GetattrMagic):
# def _get_myattr(self):
# return something
#
# defproperty(MyClass, "myattr",
# "return some value")
#
# For Python 2.2 and newer, this will construct a
# property object on the class, which avoids
# needing to override __getattr__(). It will only
# work for read-only attributes.
#
# For older versions of Python, inheriting from
# GetattrMagic will use the traditional
# __getattr__() hackery to achieve the same effect,
# but less efficiently.
#
# defproperty() should be used for each version of
# the relevant _get_<property>() function.
__all__ = ["NodeList", "EmptyNodeList", "StringTypes", "defproperty"]
import xml.dom
StringTypes = (str,)
class NodeList(list):
__slots__ = ()
def item(self, index):
if 0 <= index < len(self):
return self[index]
def _get_length(self):
return len(self)
def _set_length(self, value):
raise xml.dom.NoModificationAllowedErr(
"attempt to modify read-only attribute 'length'")
length = property(_get_length, _set_length,
doc="The number of nodes in the NodeList.")
# For backward compatibility
def __setstate__(self, state):
if state is None:
state = []
self[:] = state
class EmptyNodeList(tuple):
__slots__ = ()
def __add__(self, other):
NL = NodeList()
NL.extend(other)
return NL
def __radd__(self, other):
NL = NodeList()
NL.extend(other)
return NL
def item(self, index):
return None
def _get_length(self):
return 0
def _set_length(self, value):
raise xml.dom.NoModificationAllowedErr(
"attempt to modify read-only attribute 'length'")
length = property(_get_length, _set_length,
doc="The number of nodes in the NodeList.")
def defproperty(klass, name, doc):
get = getattr(klass, ("_get_" + name))
def set(self, value, name=name):
raise xml.dom.NoModificationAllowedErr(
"attempt to modify read-only attribute " + repr(name))
assert not hasattr(klass, "_set_" + name), \
"expected not to find _set_" + name
prop = property(get, set, doc=doc)
setattr(klass, name, prop)

1981
Lib/xml/dom/minidom.py Normal file

File diff suppressed because it is too large Load Diff

342
Lib/xml/dom/pulldom.py Normal file
View File

@@ -0,0 +1,342 @@
import xml.sax
import xml.sax.handler
START_ELEMENT = "START_ELEMENT"
END_ELEMENT = "END_ELEMENT"
COMMENT = "COMMENT"
START_DOCUMENT = "START_DOCUMENT"
END_DOCUMENT = "END_DOCUMENT"
PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
CHARACTERS = "CHARACTERS"
class PullDOM(xml.sax.ContentHandler):
_locator = None
document = None
def __init__(self, documentFactory=None):
from xml.dom import XML_NAMESPACE
self.documentFactory = documentFactory
self.firstEvent = [None, None]
self.lastEvent = self.firstEvent
self.elementStack = []
self.push = self.elementStack.append
try:
self.pop = self.elementStack.pop
except AttributeError:
# use class' pop instead
pass
self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts
self._current_context = self._ns_contexts[-1]
self.pending_events = []
def pop(self):
result = self.elementStack[-1]
del self.elementStack[-1]
return result
def setDocumentLocator(self, locator):
self._locator = locator
def startPrefixMapping(self, prefix, uri):
if not hasattr(self, '_xmlns_attrs'):
self._xmlns_attrs = []
self._xmlns_attrs.append((prefix or 'xmlns', uri))
self._ns_contexts.append(self._current_context.copy())
self._current_context[uri] = prefix or None
def endPrefixMapping(self, prefix):
self._current_context = self._ns_contexts.pop()
def startElementNS(self, name, tagName , attrs):
# Retrieve xml namespace declaration attributes.
xmlns_uri = 'http://www.w3.org/2000/xmlns/'
xmlns_attrs = getattr(self, '_xmlns_attrs', None)
if xmlns_attrs is not None:
for aname, value in xmlns_attrs:
attrs._attrs[(xmlns_uri, aname)] = value
self._xmlns_attrs = []
uri, localname = name
if uri:
# When using namespaces, the reader may or may not
# provide us with the original name. If not, create
# *a* valid tagName from the current context.
if tagName is None:
prefix = self._current_context[uri]
if prefix:
tagName = prefix + ":" + localname
else:
tagName = localname
if self.document:
node = self.document.createElementNS(uri, tagName)
else:
node = self.buildDocument(uri, tagName)
else:
# When the tagname is not prefixed, it just appears as
# localname
if self.document:
node = self.document.createElement(localname)
else:
node = self.buildDocument(None, localname)
for aname,value in attrs.items():
a_uri, a_localname = aname
if a_uri == xmlns_uri:
if a_localname == 'xmlns':
qname = a_localname
else:
qname = 'xmlns:' + a_localname
attr = self.document.createAttributeNS(a_uri, qname)
node.setAttributeNodeNS(attr)
elif a_uri:
prefix = self._current_context[a_uri]
if prefix:
qname = prefix + ":" + a_localname
else:
qname = a_localname
attr = self.document.createAttributeNS(a_uri, qname)
node.setAttributeNodeNS(attr)
else:
attr = self.document.createAttribute(a_localname)
node.setAttributeNode(attr)
attr.value = value
self.lastEvent[1] = [(START_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1]
self.push(node)
def endElementNS(self, name, tagName):
self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
self.lastEvent = self.lastEvent[1]
def startElement(self, name, attrs):
if self.document:
node = self.document.createElement(name)
else:
node = self.buildDocument(None, name)
for aname,value in attrs.items():
attr = self.document.createAttribute(aname)
attr.value = value
node.setAttributeNode(attr)
self.lastEvent[1] = [(START_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1]
self.push(node)
def endElement(self, name):
self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
self.lastEvent = self.lastEvent[1]
def comment(self, s):
if self.document:
node = self.document.createComment(s)
self.lastEvent[1] = [(COMMENT, node), None]
self.lastEvent = self.lastEvent[1]
else:
event = [(COMMENT, s), None]
self.pending_events.append(event)
def processingInstruction(self, target, data):
if self.document:
node = self.document.createProcessingInstruction(target, data)
self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
self.lastEvent = self.lastEvent[1]
else:
event = [(PROCESSING_INSTRUCTION, target, data), None]
self.pending_events.append(event)
def ignorableWhitespace(self, chars):
node = self.document.createTextNode(chars)
self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
self.lastEvent = self.lastEvent[1]
def characters(self, chars):
node = self.document.createTextNode(chars)
self.lastEvent[1] = [(CHARACTERS, node), None]
self.lastEvent = self.lastEvent[1]
def startDocument(self):
if self.documentFactory is None:
import xml.dom.minidom
self.documentFactory = xml.dom.minidom.Document.implementation
def buildDocument(self, uri, tagname):
# Can't do that in startDocument, since we need the tagname
# XXX: obtain DocumentType
node = self.documentFactory.createDocument(uri, tagname, None)
self.document = node
self.lastEvent[1] = [(START_DOCUMENT, node), None]
self.lastEvent = self.lastEvent[1]
self.push(node)
# Put everything we have seen so far into the document
for e in self.pending_events:
if e[0][0] == PROCESSING_INSTRUCTION:
_,target,data = e[0]
n = self.document.createProcessingInstruction(target, data)
e[0] = (PROCESSING_INSTRUCTION, n)
elif e[0][0] == COMMENT:
n = self.document.createComment(e[0][1])
e[0] = (COMMENT, n)
else:
raise AssertionError("Unknown pending event ",e[0][0])
self.lastEvent[1] = e
self.lastEvent = e
self.pending_events = None
return node.firstChild
def endDocument(self):
self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
self.pop()
def clear(self):
"clear(): Explicitly release parsing structures"
self.document = None
class ErrorHandler:
def warning(self, exception):
print(exception)
def error(self, exception):
raise exception
def fatalError(self, exception):
raise exception
class DOMEventStream:
def __init__(self, stream, parser, bufsize):
self.stream = stream
self.parser = parser
self.bufsize = bufsize
if not hasattr(self.parser, 'feed'):
self.getEvent = self._slurp
self.reset()
def reset(self):
self.pulldom = PullDOM()
# This content handler relies on namespace support
self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
self.parser.setContentHandler(self.pulldom)
def __getitem__(self, pos):
rc = self.getEvent()
if rc:
return rc
raise IndexError
def __next__(self):
rc = self.getEvent()
if rc:
return rc
raise StopIteration
def __iter__(self):
return self
def expandNode(self, node):
event = self.getEvent()
parents = [node]
while event:
token, cur_node = event
if cur_node is node:
return
if token != END_ELEMENT:
parents[-1].appendChild(cur_node)
if token == START_ELEMENT:
parents.append(cur_node)
elif token == END_ELEMENT:
del parents[-1]
event = self.getEvent()
def getEvent(self):
# use IncrementalParser interface, so we get the desired
# pull effect
if not self.pulldom.firstEvent[1]:
self.pulldom.lastEvent = self.pulldom.firstEvent
while not self.pulldom.firstEvent[1]:
buf = self.stream.read(self.bufsize)
if not buf:
self.parser.close()
return None
self.parser.feed(buf)
rc = self.pulldom.firstEvent[1][0]
self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
return rc
def _slurp(self):
""" Fallback replacement for getEvent() using the
standard SAX2 interface, which means we slurp the
SAX events into memory (no performance gain, but
we are compatible to all SAX parsers).
"""
self.parser.parse(self.stream)
self.getEvent = self._emit
return self._emit()
def _emit(self):
""" Fallback replacement for getEvent() that emits
the events that _slurp() read previously.
"""
rc = self.pulldom.firstEvent[1][0]
self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
return rc
def clear(self):
"""clear(): Explicitly release parsing objects"""
self.pulldom.clear()
del self.pulldom
self.parser = None
self.stream = None
class SAX2DOM(PullDOM):
def startElementNS(self, name, tagName , attrs):
PullDOM.startElementNS(self, name, tagName, attrs)
curNode = self.elementStack[-1]
parentNode = self.elementStack[-2]
parentNode.appendChild(curNode)
def startElement(self, name, attrs):
PullDOM.startElement(self, name, attrs)
curNode = self.elementStack[-1]
parentNode = self.elementStack[-2]
parentNode.appendChild(curNode)
def processingInstruction(self, target, data):
PullDOM.processingInstruction(self, target, data)
node = self.lastEvent[0][1]
parentNode = self.elementStack[-1]
parentNode.appendChild(node)
def ignorableWhitespace(self, chars):
PullDOM.ignorableWhitespace(self, chars)
node = self.lastEvent[0][1]
parentNode = self.elementStack[-1]
parentNode.appendChild(node)
def characters(self, chars):
PullDOM.characters(self, chars)
node = self.lastEvent[0][1]
parentNode = self.elementStack[-1]
parentNode.appendChild(node)
default_bufsize = (2 ** 14) - 20
def parse(stream_or_string, parser=None, bufsize=None):
if bufsize is None:
bufsize = default_bufsize
if isinstance(stream_or_string, str):
stream = open(stream_or_string, 'rb')
else:
stream = stream_or_string
if not parser:
parser = xml.sax.make_parser()
return DOMEventStream(stream, parser, bufsize)
def parseString(string, parser=None):
from io import StringIO
bufsize = len(string)
buf = StringIO(string)
if not parser:
parser = xml.sax.make_parser()
return DOMEventStream(buf, parser, bufsize)

410
Lib/xml/dom/xmlbuilder.py Normal file
View File

@@ -0,0 +1,410 @@
"""Implementation of the DOM Level 3 'LS-Load' feature."""
import copy
import warnings
import xml.dom
from xml.dom.NodeFilter import NodeFilter
__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
class Options:
"""Features object that has variables set for each DOMBuilder feature.
The DOMBuilder class uses an instance of this class to pass settings to
the ExpatBuilder class.
"""
# Note that the DOMBuilder class in LoadSave constrains which of these
# values can be set using the DOM Level 3 LoadSave feature.
namespaces = 1
namespace_declarations = True
validation = False
external_parameter_entities = True
external_general_entities = True
external_dtd_subset = True
validate_if_schema = False
validate = False
datatype_normalization = False
create_entity_ref_nodes = True
entities = True
whitespace_in_element_content = True
cdata_sections = True
comments = True
charset_overrides_xml_encoding = True
infoset = False
supported_mediatypes_only = False
errorHandler = None
filter = None
class DOMBuilder:
entityResolver = None
errorHandler = None
filter = None
ACTION_REPLACE = 1
ACTION_APPEND_AS_CHILDREN = 2
ACTION_INSERT_AFTER = 3
ACTION_INSERT_BEFORE = 4
_legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
def __init__(self):
self._options = Options()
def _get_entityResolver(self):
return self.entityResolver
def _set_entityResolver(self, entityResolver):
self.entityResolver = entityResolver
def _get_errorHandler(self):
return self.errorHandler
def _set_errorHandler(self, errorHandler):
self.errorHandler = errorHandler
def _get_filter(self):
return self.filter
def _set_filter(self, filter):
self.filter = filter
def setFeature(self, name, state):
if self.supportsFeature(name):
state = state and 1 or 0
try:
settings = self._settings[(_name_xform(name), state)]
except KeyError:
raise xml.dom.NotSupportedErr(
"unsupported feature: %r" % (name,))
else:
for name, value in settings:
setattr(self._options, name, value)
else:
raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
def supportsFeature(self, name):
return hasattr(self._options, _name_xform(name))
def canSetFeature(self, name, state):
key = (_name_xform(name), state and 1 or 0)
return key in self._settings
# This dictionary maps from (feature,value) to a list of
# (option,value) pairs that should be set on the Options object.
# If a (feature,value) setting is not in this dictionary, it is
# not supported by the DOMBuilder.
#
_settings = {
("namespace_declarations", 0): [
("namespace_declarations", 0)],
("namespace_declarations", 1): [
("namespace_declarations", 1)],
("validation", 0): [
("validation", 0)],
("external_general_entities", 0): [
("external_general_entities", 0)],
("external_general_entities", 1): [
("external_general_entities", 1)],
("external_parameter_entities", 0): [
("external_parameter_entities", 0)],
("external_parameter_entities", 1): [
("external_parameter_entities", 1)],
("validate_if_schema", 0): [
("validate_if_schema", 0)],
("create_entity_ref_nodes", 0): [
("create_entity_ref_nodes", 0)],
("create_entity_ref_nodes", 1): [
("create_entity_ref_nodes", 1)],
("entities", 0): [
("create_entity_ref_nodes", 0),
("entities", 0)],
("entities", 1): [
("entities", 1)],
("whitespace_in_element_content", 0): [
("whitespace_in_element_content", 0)],
("whitespace_in_element_content", 1): [
("whitespace_in_element_content", 1)],
("cdata_sections", 0): [
("cdata_sections", 0)],
("cdata_sections", 1): [
("cdata_sections", 1)],
("comments", 0): [
("comments", 0)],
("comments", 1): [
("comments", 1)],
("charset_overrides_xml_encoding", 0): [
("charset_overrides_xml_encoding", 0)],
("charset_overrides_xml_encoding", 1): [
("charset_overrides_xml_encoding", 1)],
("infoset", 0): [],
("infoset", 1): [
("namespace_declarations", 0),
("validate_if_schema", 0),
("create_entity_ref_nodes", 0),
("entities", 0),
("cdata_sections", 0),
("datatype_normalization", 1),
("whitespace_in_element_content", 1),
("comments", 1),
("charset_overrides_xml_encoding", 1)],
("supported_mediatypes_only", 0): [
("supported_mediatypes_only", 0)],
("namespaces", 0): [
("namespaces", 0)],
("namespaces", 1): [
("namespaces", 1)],
}
def getFeature(self, name):
xname = _name_xform(name)
try:
return getattr(self._options, xname)
except AttributeError:
if name == "infoset":
options = self._options
return (options.datatype_normalization
and options.whitespace_in_element_content
and options.comments
and options.charset_overrides_xml_encoding
and not (options.namespace_declarations
or options.validate_if_schema
or options.create_entity_ref_nodes
or options.entities
or options.cdata_sections))
raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
def parseURI(self, uri):
if self.entityResolver:
input = self.entityResolver.resolveEntity(None, uri)
else:
input = DOMEntityResolver().resolveEntity(None, uri)
return self.parse(input)
def parse(self, input):
options = copy.copy(self._options)
options.filter = self.filter
options.errorHandler = self.errorHandler
fp = input.byteStream
if fp is None and options.systemId:
import urllib.request
fp = urllib.request.urlopen(input.systemId)
return self._parse_bytestream(fp, options)
def parseWithContext(self, input, cnode, action):
if action not in self._legal_actions:
raise ValueError("not a legal action")
raise NotImplementedError("Haven't written this yet...")
def _parse_bytestream(self, stream, options):
import xml.dom.expatbuilder
builder = xml.dom.expatbuilder.makeBuilder(options)
return builder.parseFile(stream)
def _name_xform(name):
return name.lower().replace('-', '_')
class DOMEntityResolver(object):
__slots__ = '_opener',
def resolveEntity(self, publicId, systemId):
assert systemId is not None
source = DOMInputSource()
source.publicId = publicId
source.systemId = systemId
source.byteStream = self._get_opener().open(systemId)
# determine the encoding if the transport provided it
source.encoding = self._guess_media_encoding(source)
# determine the base URI is we can
import posixpath, urllib.parse
parts = urllib.parse.urlparse(systemId)
scheme, netloc, path, params, query, fragment = parts
# XXX should we check the scheme here as well?
if path and not path.endswith("/"):
path = posixpath.dirname(path) + "/"
parts = scheme, netloc, path, params, query, fragment
source.baseURI = urllib.parse.urlunparse(parts)
return source
def _get_opener(self):
try:
return self._opener
except AttributeError:
self._opener = self._create_opener()
return self._opener
def _create_opener(self):
import urllib.request
return urllib.request.build_opener()
def _guess_media_encoding(self, source):
info = source.byteStream.info()
if "Content-Type" in info:
for param in info.getplist():
if param.startswith("charset="):
return param.split("=", 1)[1].lower()
class DOMInputSource(object):
__slots__ = ('byteStream', 'characterStream', 'stringData',
'encoding', 'publicId', 'systemId', 'baseURI')
def __init__(self):
self.byteStream = None
self.characterStream = None
self.stringData = None
self.encoding = None
self.publicId = None
self.systemId = None
self.baseURI = None
def _get_byteStream(self):
return self.byteStream
def _set_byteStream(self, byteStream):
self.byteStream = byteStream
def _get_characterStream(self):
return self.characterStream
def _set_characterStream(self, characterStream):
self.characterStream = characterStream
def _get_stringData(self):
return self.stringData
def _set_stringData(self, data):
self.stringData = data
def _get_encoding(self):
return self.encoding
def _set_encoding(self, encoding):
self.encoding = encoding
def _get_publicId(self):
return self.publicId
def _set_publicId(self, publicId):
self.publicId = publicId
def _get_systemId(self):
return self.systemId
def _set_systemId(self, systemId):
self.systemId = systemId
def _get_baseURI(self):
return self.baseURI
def _set_baseURI(self, uri):
self.baseURI = uri
class DOMBuilderFilter:
"""Element filter which can be used to tailor construction of
a DOM instance.
"""
# There's really no need for this class; concrete implementations
# should just implement the endElement() and startElement()
# methods as appropriate. Using this makes it easy to only
# implement one of them.
FILTER_ACCEPT = 1
FILTER_REJECT = 2
FILTER_SKIP = 3
FILTER_INTERRUPT = 4
whatToShow = NodeFilter.SHOW_ALL
def _get_whatToShow(self):
return self.whatToShow
def acceptNode(self, element):
return self.FILTER_ACCEPT
def startContainer(self, element):
return self.FILTER_ACCEPT
del NodeFilter
class _AsyncDeprecatedProperty:
def warn(self, cls):
clsname = cls.__name__
warnings.warn(
"{cls}.async is deprecated; use {cls}.async_".format(cls=clsname),
DeprecationWarning)
def __get__(self, instance, cls):
self.warn(cls)
if instance is not None:
return instance.async_
return False
def __set__(self, instance, value):
self.warn(type(instance))
setattr(instance, 'async_', value)
class DocumentLS:
"""Mixin to create documents that conform to the load/save spec."""
async_ = False
locals()['async'] = _AsyncDeprecatedProperty() # Avoid DeprecationWarning
def _get_async(self):
return False
def _set_async(self, flag):
if flag:
raise xml.dom.NotSupportedErr(
"asynchronous document loading is not supported")
def abort(self):
# What does it mean to "clear" a document? Does the
# documentElement disappear?
raise NotImplementedError(
"haven't figured out what this means yet")
def load(self, uri):
raise NotImplementedError("haven't written this yet")
def loadXML(self, source):
raise NotImplementedError("haven't written this yet")
def saveXML(self, snode):
if snode is None:
snode = self
elif snode.ownerDocument is not self:
raise xml.dom.WrongDocumentErr()
return snode.toxml()
del _AsyncDeprecatedProperty
class DOMImplementationLS:
MODE_SYNCHRONOUS = 1
MODE_ASYNCHRONOUS = 2
def createDOMBuilder(self, mode, schemaType):
if schemaType is not None:
raise xml.dom.NotSupportedErr(
"schemaType not yet supported")
if mode == self.MODE_SYNCHRONOUS:
return DOMBuilder()
if mode == self.MODE_ASYNCHRONOUS:
raise xml.dom.NotSupportedErr(
"asynchronous builders are not supported")
raise ValueError("unknown value for mode")
def createDOMWriter(self):
raise NotImplementedError(
"the writer interface hasn't been written yet!")
def createDOMInputSource(self):
return DOMInputSource()

View File

@@ -0,0 +1,143 @@
#
# ElementTree
# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $
#
# limited xinclude support for element trees
#
# history:
# 2003-08-15 fl created
# 2003-11-14 fl fixed default loader
#
# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
#
# fredrik@pythonware.com
# http://www.pythonware.com
#
# --------------------------------------------------------------------
# The ElementTree toolkit is
#
# Copyright (c) 1999-2008 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
#
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Secret Labs AB or the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/psf/license for licensing details.
##
# Limited XInclude support for the ElementTree package.
##
import copy
from . import ElementTree
XINCLUDE = "{http://www.w3.org/2001/XInclude}"
XINCLUDE_INCLUDE = XINCLUDE + "include"
XINCLUDE_FALLBACK = XINCLUDE + "fallback"
##
# Fatal include error.
class FatalIncludeError(SyntaxError):
pass
##
# Default loader. This loader reads an included resource from disk.
#
# @param href Resource reference.
# @param parse Parse mode. Either "xml" or "text".
# @param encoding Optional text encoding (UTF-8 by default for "text").
# @return The expanded resource. If the parse mode is "xml", this
# is an ElementTree instance. If the parse mode is "text", this
# is a Unicode string. If the loader fails, it can return None
# or raise an OSError exception.
# @throws OSError If the loader fails to load the resource.
def default_loader(href, parse, encoding=None):
if parse == "xml":
with open(href, 'rb') as file:
data = ElementTree.parse(file).getroot()
else:
if not encoding:
encoding = 'UTF-8'
with open(href, 'r', encoding=encoding) as file:
data = file.read()
return data
##
# Expand XInclude directives.
#
# @param elem Root element.
# @param loader Optional resource loader. If omitted, it defaults
# to {@link default_loader}. If given, it should be a callable
# that implements the same interface as <b>default_loader</b>.
# @throws FatalIncludeError If the function fails to include a given
# resource, or if the tree contains malformed XInclude elements.
# @throws OSError If the function fails to load a given resource.
def include(elem, loader=None):
if loader is None:
loader = default_loader
# look for xinclude elements
i = 0
while i < len(elem):
e = elem[i]
if e.tag == XINCLUDE_INCLUDE:
# process xinclude directive
href = e.get("href")
parse = e.get("parse", "xml")
if parse == "xml":
node = loader(href, parse)
if node is None:
raise FatalIncludeError(
"cannot load %r as %r" % (href, parse)
)
node = copy.copy(node)
if e.tail:
node.tail = (node.tail or "") + e.tail
elem[i] = node
elif parse == "text":
text = loader(href, parse, e.get("encoding"))
if text is None:
raise FatalIncludeError(
"cannot load %r as %r" % (href, parse)
)
if i:
node = elem[i-1]
node.tail = (node.tail or "") + text + (e.tail or "")
else:
elem.text = (elem.text or "") + text + (e.tail or "")
del elem[i]
continue
else:
raise FatalIncludeError(
"unknown parse type in xi:include tag (%r)" % parse
)
elif e.tag == XINCLUDE_FALLBACK:
raise FatalIncludeError(
"xi:fallback tag must be child of xi:include (%r)" % e.tag
)
else:
include(e, loader)
i = i + 1

View File

@@ -0,0 +1,314 @@
#
# ElementTree
# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
#
# limited xpath support for element trees
#
# history:
# 2003-05-23 fl created
# 2003-05-28 fl added support for // etc
# 2003-08-27 fl fixed parsing of periods in element names
# 2007-09-10 fl new selection engine
# 2007-09-12 fl fixed parent selector
# 2007-09-13 fl added iterfind; changed findall to return a list
# 2007-11-30 fl added namespaces support
# 2009-10-30 fl added child element value filter
#
# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
#
# fredrik@pythonware.com
# http://www.pythonware.com
#
# --------------------------------------------------------------------
# The ElementTree toolkit is
#
# Copyright (c) 1999-2009 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
#
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Secret Labs AB or the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/psf/license for licensing details.
##
# Implementation module for XPath support. There's usually no reason
# to import this module directly; the <b>ElementTree</b> does this for
# you, if needed.
##
import re
xpath_tokenizer_re = re.compile(
r"("
r"'[^']*'|\"[^\"]*\"|"
r"::|"
r"//?|"
r"\.\.|"
r"\(\)|"
r"[/.*:\[\]\(\)@=])|"
r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
r"\s+"
)
def xpath_tokenizer(pattern, namespaces=None):
for token in xpath_tokenizer_re.findall(pattern):
tag = token[1]
if tag and tag[0] != "{" and ":" in tag:
try:
prefix, uri = tag.split(":", 1)
if not namespaces:
raise KeyError
yield token[0], "{%s}%s" % (namespaces[prefix], uri)
except KeyError:
raise SyntaxError("prefix %r not found in prefix map" % prefix)
else:
yield token
def get_parent_map(context):
parent_map = context.parent_map
if parent_map is None:
context.parent_map = parent_map = {}
for p in context.root.iter():
for e in p:
parent_map[e] = p
return parent_map
def prepare_child(next, token):
tag = token[1]
def select(context, result):
for elem in result:
for e in elem:
if e.tag == tag:
yield e
return select
def prepare_star(next, token):
def select(context, result):
for elem in result:
yield from elem
return select
def prepare_self(next, token):
def select(context, result):
yield from result
return select
def prepare_descendant(next, token):
try:
token = next()
except StopIteration:
return
if token[0] == "*":
tag = "*"
elif not token[0]:
tag = token[1]
else:
raise SyntaxError("invalid descendant")
def select(context, result):
for elem in result:
for e in elem.iter(tag):
if e is not elem:
yield e
return select
def prepare_parent(next, token):
def select(context, result):
# FIXME: raise error if .. is applied at toplevel?
parent_map = get_parent_map(context)
result_map = {}
for elem in result:
if elem in parent_map:
parent = parent_map[elem]
if parent not in result_map:
result_map[parent] = None
yield parent
return select
def prepare_predicate(next, token):
# FIXME: replace with real parser!!! refs:
# http://effbot.org/zone/simple-iterator-parser.htm
# http://javascript.crockford.com/tdop/tdop.html
signature = []
predicate = []
while 1:
try:
token = next()
except StopIteration:
return
if token[0] == "]":
break
if token[0] and token[0][:1] in "'\"":
token = "'", token[0][1:-1]
signature.append(token[0] or "-")
predicate.append(token[1])
signature = "".join(signature)
# use signature to determine predicate type
if signature == "@-":
# [@attribute] predicate
key = predicate[1]
def select(context, result):
for elem in result:
if elem.get(key) is not None:
yield elem
return select
if signature == "@-='":
# [@attribute='value']
key = predicate[1]
value = predicate[-1]
def select(context, result):
for elem in result:
if elem.get(key) == value:
yield elem
return select
if signature == "-" and not re.match(r"\-?\d+$", predicate[0]):
# [tag]
tag = predicate[0]
def select(context, result):
for elem in result:
if elem.find(tag) is not None:
yield elem
return select
if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
# [tag='value']
tag = predicate[0]
value = predicate[-1]
def select(context, result):
for elem in result:
for e in elem.findall(tag):
if "".join(e.itertext()) == value:
yield elem
break
return select
if signature == "-" or signature == "-()" or signature == "-()-":
# [index] or [last()] or [last()-index]
if signature == "-":
# [index]
index = int(predicate[0]) - 1
if index < 0:
raise SyntaxError("XPath position >= 1 expected")
else:
if predicate[0] != "last":
raise SyntaxError("unsupported function")
if signature == "-()-":
try:
index = int(predicate[2]) - 1
except ValueError:
raise SyntaxError("unsupported expression")
if index > -2:
raise SyntaxError("XPath offset from last() must be negative")
else:
index = -1
def select(context, result):
parent_map = get_parent_map(context)
for elem in result:
try:
parent = parent_map[elem]
# FIXME: what if the selector is "*" ?
elems = list(parent.findall(elem.tag))
if elems[index] is elem:
yield elem
except (IndexError, KeyError):
pass
return select
raise SyntaxError("invalid predicate")
ops = {
"": prepare_child,
"*": prepare_star,
".": prepare_self,
"..": prepare_parent,
"//": prepare_descendant,
"[": prepare_predicate,
}
_cache = {}
class _SelectorContext:
parent_map = None
def __init__(self, root):
self.root = root
# --------------------------------------------------------------------
##
# Generate all matching objects.
def iterfind(elem, path, namespaces=None):
# compile selector pattern
cache_key = (path, None if namespaces is None
else tuple(sorted(namespaces.items())))
if path[-1:] == "/":
path = path + "*" # implicit all (FIXME: keep this?)
try:
selector = _cache[cache_key]
except KeyError:
if len(_cache) > 100:
_cache.clear()
if path[:1] == "/":
raise SyntaxError("cannot use absolute path on element")
next = iter(xpath_tokenizer(path, namespaces)).__next__
try:
token = next()
except StopIteration:
return
selector = []
while 1:
try:
selector.append(ops[token[0]](next, token))
except StopIteration:
raise SyntaxError("invalid path")
try:
token = next()
if token[0] == "/":
token = next()
except StopIteration:
break
_cache[cache_key] = selector
# execute selector pattern
result = [elem]
context = _SelectorContext(elem)
for select in selector:
result = select(context, result)
return result
##
# Find first matching object.
def find(elem, path, namespaces=None):
return next(iterfind(elem, path, namespaces), None)
##
# Find all matching objects.
def findall(elem, path, namespaces=None):
return list(iterfind(elem, path, namespaces))
##
# Find text for first matching object.
def findtext(elem, path, default=None, namespaces=None):
try:
elem = next(iterfind(elem, path, namespaces))
return elem.text or ""
except StopIteration:
return default

1656
Lib/xml/etree/ElementTree.py Normal file

File diff suppressed because it is too large Load Diff

33
Lib/xml/etree/__init__.py Normal file
View File

@@ -0,0 +1,33 @@
# $Id: __init__.py 3375 2008-02-13 08:05:08Z fredrik $
# elementtree package
# --------------------------------------------------------------------
# The ElementTree toolkit is
#
# Copyright (c) 1999-2008 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
#
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Secret Labs AB or the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/psf/license for licensing details.

View File

@@ -0,0 +1,3 @@
# Deprecated alias for xml.etree.ElementTree
from xml.etree.ElementTree import *

View File

@@ -0,0 +1,8 @@
"""Python interfaces to XML parsers.
This package contains one module:
expat -- Python wrapper for James Clark's Expat parser, with namespace
support.
"""

8
Lib/xml/parsers/expat.py Normal file
View File

@@ -0,0 +1,8 @@
"""Interface to the Expat non-validating XML parser."""
import sys
from pyexpat import *
# provide pyexpat submodules as xml.parsers.expat submodules
sys.modules['xml.parsers.expat.model'] = model
sys.modules['xml.parsers.expat.errors'] = errors

107
Lib/xml/sax/__init__.py Normal file
View File

@@ -0,0 +1,107 @@
"""Simple API for XML (SAX) implementation for Python.
This module provides an implementation of the SAX 2 interface;
information about the Java version of the interface can be found at
http://www.megginson.com/SAX/. The Python version of the interface is
documented at <...>.
This package contains the following modules:
handler -- Base classes and constants which define the SAX 2 API for
the 'client-side' of SAX for Python.
saxutils -- Implementation of the convenience classes commonly used to
work with SAX.
xmlreader -- Base classes and constants which define the SAX 2 API for
the parsers used with SAX for Python.
expatreader -- Driver that allows use of the Expat parser with SAX.
"""
from .xmlreader import InputSource
from .handler import ContentHandler, ErrorHandler
from ._exceptions import SAXException, SAXNotRecognizedException, \
SAXParseException, SAXNotSupportedException, \
SAXReaderNotAvailable
def parse(source, handler, errorHandler=ErrorHandler()):
parser = make_parser()
parser.setContentHandler(handler)
parser.setErrorHandler(errorHandler)
parser.parse(source)
def parseString(string, handler, errorHandler=ErrorHandler()):
import io
if errorHandler is None:
errorHandler = ErrorHandler()
parser = make_parser()
parser.setContentHandler(handler)
parser.setErrorHandler(errorHandler)
inpsrc = InputSource()
if isinstance(string, str):
inpsrc.setCharacterStream(io.StringIO(string))
else:
inpsrc.setByteStream(io.BytesIO(string))
parser.parse(inpsrc)
# this is the parser list used by the make_parser function if no
# alternatives are given as parameters to the function
default_parser_list = ["xml.sax.expatreader"]
# tell modulefinder that importing sax potentially imports expatreader
_false = 0
if _false:
import xml.sax.expatreader
import os, sys
if not sys.flags.ignore_environment and "PY_SAX_PARSER" in os.environ:
default_parser_list = os.environ["PY_SAX_PARSER"].split(",")
del os
_key = "python.xml.sax.parser"
if sys.platform[:4] == "java" and sys.registry.containsKey(_key):
default_parser_list = sys.registry.getProperty(_key).split(",")
def make_parser(parser_list = []):
"""Creates and returns a SAX parser.
Creates the first parser it is able to instantiate of the ones
given in the list created by doing parser_list +
default_parser_list. The lists must contain the names of Python
modules containing both a SAX parser and a create_parser function."""
for parser_name in parser_list + default_parser_list:
try:
return _create_parser(parser_name)
except ImportError as e:
import sys
if parser_name in sys.modules:
# The parser module was found, but importing it
# failed unexpectedly, pass this exception through
raise
except SAXReaderNotAvailable:
# The parser module detected that it won't work properly,
# so try the next one
pass
raise SAXReaderNotAvailable("No parsers found", None)
# --- Internal utility methods used by make_parser
if sys.platform[ : 4] == "java":
def _create_parser(parser_name):
from org.python.core import imp
drv_module = imp.importName(parser_name, 0, globals())
return drv_module.create_parser()
else:
def _create_parser(parser_name):
drv_module = __import__(parser_name,{},{},['create_parser'])
return drv_module.create_parser()
del sys

131
Lib/xml/sax/_exceptions.py Normal file
View File

@@ -0,0 +1,131 @@
"""Different kinds of SAX Exceptions"""
import sys
if sys.platform[:4] == "java":
from java.lang import Exception
del sys
# ===== SAXEXCEPTION =====
class SAXException(Exception):
"""Encapsulate an XML error or warning. This class can contain
basic error or warning information from either the XML parser or
the application: you can subclass it to provide additional
functionality, or to add localization. Note that although you will
receive a SAXException as the argument to the handlers in the
ErrorHandler interface, you are not actually required to raise
the exception; instead, you can simply read the information in
it."""
def __init__(self, msg, exception=None):
"""Creates an exception. The message is required, but the exception
is optional."""
self._msg = msg
self._exception = exception
Exception.__init__(self, msg)
def getMessage(self):
"Return a message for this exception."
return self._msg
def getException(self):
"Return the embedded exception, or None if there was none."
return self._exception
def __str__(self):
"Create a string representation of the exception."
return self._msg
def __getitem__(self, ix):
"""Avoids weird error messages if someone does exception[ix] by
mistake, since Exception has __getitem__ defined."""
raise AttributeError("__getitem__")
# ===== SAXPARSEEXCEPTION =====
class SAXParseException(SAXException):
"""Encapsulate an XML parse error or warning.
This exception will include information for locating the error in
the original XML document. Note that although the application will
receive a SAXParseException as the argument to the handlers in the
ErrorHandler interface, the application is not actually required
to raise the exception; instead, it can simply read the
information in it and take a different action.
Since this exception is a subclass of SAXException, it inherits
the ability to wrap another exception."""
def __init__(self, msg, exception, locator):
"Creates the exception. The exception parameter is allowed to be None."
SAXException.__init__(self, msg, exception)
self._locator = locator
# We need to cache this stuff at construction time.
# If this exception is raised, the objects through which we must
# traverse to get this information may be deleted by the time
# it gets caught.
self._systemId = self._locator.getSystemId()
self._colnum = self._locator.getColumnNumber()
self._linenum = self._locator.getLineNumber()
def getColumnNumber(self):
"""The column number of the end of the text where the exception
occurred."""
return self._colnum
def getLineNumber(self):
"The line number of the end of the text where the exception occurred."
return self._linenum
def getPublicId(self):
"Get the public identifier of the entity where the exception occurred."
return self._locator.getPublicId()
def getSystemId(self):
"Get the system identifier of the entity where the exception occurred."
return self._systemId
def __str__(self):
"Create a string representation of the exception."
sysid = self.getSystemId()
if sysid is None:
sysid = "<unknown>"
linenum = self.getLineNumber()
if linenum is None:
linenum = "?"
colnum = self.getColumnNumber()
if colnum is None:
colnum = "?"
return "%s:%s:%s: %s" % (sysid, linenum, colnum, self._msg)
# ===== SAXNOTRECOGNIZEDEXCEPTION =====
class SAXNotRecognizedException(SAXException):
"""Exception class for an unrecognized identifier.
An XMLReader will raise this exception when it is confronted with an
unrecognized feature or property. SAX applications and extensions may
use this class for similar purposes."""
# ===== SAXNOTSUPPORTEDEXCEPTION =====
class SAXNotSupportedException(SAXException):
"""Exception class for an unsupported operation.
An XMLReader will raise this exception when a service it cannot
perform is requested (specifically setting a state or value). SAX
applications and extensions may use this class for similar
purposes."""
# ===== SAXNOTSUPPORTEDEXCEPTION =====
class SAXReaderNotAvailable(SAXNotSupportedException):
"""Exception class for a missing driver.
An XMLReader module (driver) should raise this exception when it
is first imported, e.g. when a support module cannot be imported.
It also may be raised during parsing, e.g. if executing an external
program is not permitted."""

446
Lib/xml/sax/expatreader.py Normal file
View File

@@ -0,0 +1,446 @@
"""
SAX driver for the pyexpat C module. This driver works with
pyexpat.__version__ == '2.22'.
"""
version = "0.20"
from xml.sax._exceptions import *
from xml.sax.handler import feature_validation, feature_namespaces
from xml.sax.handler import feature_namespace_prefixes
from xml.sax.handler import feature_external_ges, feature_external_pes
from xml.sax.handler import feature_string_interning
from xml.sax.handler import property_xml_string, property_interning_dict
# xml.parsers.expat does not raise ImportError in Jython
import sys
if sys.platform[:4] == "java":
raise SAXReaderNotAvailable("expat not available in Java", None)
del sys
try:
from xml.parsers import expat
except ImportError:
raise SAXReaderNotAvailable("expat not supported", None)
else:
if not hasattr(expat, "ParserCreate"):
raise SAXReaderNotAvailable("expat not supported", None)
from xml.sax import xmlreader, saxutils, handler
AttributesImpl = xmlreader.AttributesImpl
AttributesNSImpl = xmlreader.AttributesNSImpl
# If we're using a sufficiently recent version of Python, we can use
# weak references to avoid cycles between the parser and content
# handler, otherwise we'll just have to pretend.
try:
import _weakref
except ImportError:
def _mkproxy(o):
return o
else:
import weakref
_mkproxy = weakref.proxy
del weakref, _weakref
class _ClosedParser:
pass
# --- ExpatLocator
class ExpatLocator(xmlreader.Locator):
"""Locator for use with the ExpatParser class.
This uses a weak reference to the parser object to avoid creating
a circular reference between the parser and the content handler.
"""
def __init__(self, parser):
self._ref = _mkproxy(parser)
def getColumnNumber(self):
parser = self._ref
if parser._parser is None:
return None
return parser._parser.ErrorColumnNumber
def getLineNumber(self):
parser = self._ref
if parser._parser is None:
return 1
return parser._parser.ErrorLineNumber
def getPublicId(self):
parser = self._ref
if parser is None:
return None
return parser._source.getPublicId()
def getSystemId(self):
parser = self._ref
if parser is None:
return None
return parser._source.getSystemId()
# --- ExpatParser
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
"""SAX driver for the pyexpat C module."""
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
xmlreader.IncrementalParser.__init__(self, bufsize)
self._source = xmlreader.InputSource()
self._parser = None
self._namespaces = namespaceHandling
self._lex_handler_prop = None
self._parsing = 0
self._entity_stack = []
self._external_ges = 0
self._interning = None
# XMLReader methods
def parse(self, source):
"Parse an XML document from a URL or an InputSource."
source = saxutils.prepare_input_source(source)
self._source = source
try:
self.reset()
self._cont_handler.setDocumentLocator(ExpatLocator(self))
xmlreader.IncrementalParser.parse(self, source)
except:
# bpo-30264: Close the source on error to not leak resources:
# xml.sax.parse() doesn't give access to the underlying parser
# to the caller
self._close_source()
raise
def prepareParser(self, source):
if source.getSystemId() is not None:
self._parser.SetBase(source.getSystemId())
# Redefined setContentHandler to allow changing handlers during parsing
def setContentHandler(self, handler):
xmlreader.IncrementalParser.setContentHandler(self, handler)
if self._parsing:
self._reset_cont_handler()
def getFeature(self, name):
if name == feature_namespaces:
return self._namespaces
elif name == feature_string_interning:
return self._interning is not None
elif name in (feature_validation, feature_external_pes,
feature_namespace_prefixes):
return 0
elif name == feature_external_ges:
return self._external_ges
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
def setFeature(self, name, state):
if self._parsing:
raise SAXNotSupportedException("Cannot set features while parsing")
if name == feature_namespaces:
self._namespaces = state
elif name == feature_external_ges:
self._external_ges = state
elif name == feature_string_interning:
if state:
if self._interning is None:
self._interning = {}
else:
self._interning = None
elif name == feature_validation:
if state:
raise SAXNotSupportedException(
"expat does not support validation")
elif name == feature_external_pes:
if state:
raise SAXNotSupportedException(
"expat does not read external parameter entities")
elif name == feature_namespace_prefixes:
if state:
raise SAXNotSupportedException(
"expat does not report namespace prefixes")
else:
raise SAXNotRecognizedException(
"Feature '%s' not recognized" % name)
def getProperty(self, name):
if name == handler.property_lexical_handler:
return self._lex_handler_prop
elif name == property_interning_dict:
return self._interning
elif name == property_xml_string:
if self._parser:
if hasattr(self._parser, "GetInputContext"):
return self._parser.GetInputContext()
else:
raise SAXNotRecognizedException(
"This version of expat does not support getting"
" the XML string")
else:
raise SAXNotSupportedException(
"XML string cannot be returned when not parsing")
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
def setProperty(self, name, value):
if name == handler.property_lexical_handler:
self._lex_handler_prop = value
if self._parsing:
self._reset_lex_handler_prop()
elif name == property_interning_dict:
self._interning = value
elif name == property_xml_string:
raise SAXNotSupportedException("Property '%s' cannot be set" %
name)
else:
raise SAXNotRecognizedException("Property '%s' not recognized" %
name)
# IncrementalParser methods
def feed(self, data, isFinal = 0):
if not self._parsing:
self.reset()
self._parsing = 1
self._cont_handler.startDocument()
try:
# The isFinal parameter is internal to the expat reader.
# If it is set to true, expat will check validity of the entire
# document. When feeding chunks, they are not normally final -
# except when invoked from close.
self._parser.Parse(data, isFinal)
except expat.error as e:
exc = SAXParseException(expat.ErrorString(e.code), e, self)
# FIXME: when to invoke error()?
self._err_handler.fatalError(exc)
def _close_source(self):
source = self._source
try:
file = source.getCharacterStream()
if file is not None:
file.close()
finally:
file = source.getByteStream()
if file is not None:
file.close()
def close(self):
if (self._entity_stack or self._parser is None or
isinstance(self._parser, _ClosedParser)):
# If we are completing an external entity, do nothing here
return
try:
self.feed("", isFinal = 1)
self._cont_handler.endDocument()
self._parsing = 0
# break cycle created by expat handlers pointing to our methods
self._parser = None
finally:
self._parsing = 0
if self._parser is not None:
# Keep ErrorColumnNumber and ErrorLineNumber after closing.
parser = _ClosedParser()
parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
parser.ErrorLineNumber = self._parser.ErrorLineNumber
self._parser = parser
self._close_source()
def _reset_cont_handler(self):
self._parser.ProcessingInstructionHandler = \
self._cont_handler.processingInstruction
self._parser.CharacterDataHandler = self._cont_handler.characters
def _reset_lex_handler_prop(self):
lex = self._lex_handler_prop
parser = self._parser
if lex is None:
parser.CommentHandler = None
parser.StartCdataSectionHandler = None
parser.EndCdataSectionHandler = None
parser.StartDoctypeDeclHandler = None
parser.EndDoctypeDeclHandler = None
else:
parser.CommentHandler = lex.comment
parser.StartCdataSectionHandler = lex.startCDATA
parser.EndCdataSectionHandler = lex.endCDATA
parser.StartDoctypeDeclHandler = self.start_doctype_decl
parser.EndDoctypeDeclHandler = lex.endDTD
def reset(self):
if self._namespaces:
self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
intern=self._interning)
self._parser.namespace_prefixes = 1
self._parser.StartElementHandler = self.start_element_ns
self._parser.EndElementHandler = self.end_element_ns
else:
self._parser = expat.ParserCreate(self._source.getEncoding(),
intern = self._interning)
self._parser.StartElementHandler = self.start_element
self._parser.EndElementHandler = self.end_element
self._reset_cont_handler()
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
self._parser.NotationDeclHandler = self.notation_decl
self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
self._decl_handler_prop = None
if self._lex_handler_prop:
self._reset_lex_handler_prop()
# self._parser.DefaultHandler =
# self._parser.DefaultHandlerExpand =
# self._parser.NotStandaloneHandler =
self._parser.ExternalEntityRefHandler = self.external_entity_ref
try:
self._parser.SkippedEntityHandler = self.skipped_entity_handler
except AttributeError:
# This pyexpat does not support SkippedEntity
pass
self._parser.SetParamEntityParsing(
expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
self._parsing = 0
self._entity_stack = []
# Locator methods
def getColumnNumber(self):
if self._parser is None:
return None
return self._parser.ErrorColumnNumber
def getLineNumber(self):
if self._parser is None:
return 1
return self._parser.ErrorLineNumber
def getPublicId(self):
return self._source.getPublicId()
def getSystemId(self):
return self._source.getSystemId()
# event handlers
def start_element(self, name, attrs):
self._cont_handler.startElement(name, AttributesImpl(attrs))
def end_element(self, name):
self._cont_handler.endElement(name)
def start_element_ns(self, name, attrs):
pair = name.split()
if len(pair) == 1:
# no namespace
pair = (None, name)
elif len(pair) == 3:
pair = pair[0], pair[1]
else:
# default namespace
pair = tuple(pair)
newattrs = {}
qnames = {}
for (aname, value) in attrs.items():
parts = aname.split()
length = len(parts)
if length == 1:
# no namespace
qname = aname
apair = (None, aname)
elif length == 3:
qname = "%s:%s" % (parts[2], parts[1])
apair = parts[0], parts[1]
else:
# default namespace
qname = parts[1]
apair = tuple(parts)
newattrs[apair] = value
qnames[apair] = qname
self._cont_handler.startElementNS(pair, None,
AttributesNSImpl(newattrs, qnames))
def end_element_ns(self, name):
pair = name.split()
if len(pair) == 1:
pair = (None, name)
elif len(pair) == 3:
pair = pair[0], pair[1]
else:
pair = tuple(pair)
self._cont_handler.endElementNS(pair, None)
# this is not used (call directly to ContentHandler)
def processing_instruction(self, target, data):
self._cont_handler.processingInstruction(target, data)
# this is not used (call directly to ContentHandler)
def character_data(self, data):
self._cont_handler.characters(data)
def start_namespace_decl(self, prefix, uri):
self._cont_handler.startPrefixMapping(prefix, uri)
def end_namespace_decl(self, prefix):
self._cont_handler.endPrefixMapping(prefix)
def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
self._lex_handler_prop.startDTD(name, pubid, sysid)
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
def notation_decl(self, name, base, sysid, pubid):
self._dtd_handler.notationDecl(name, pubid, sysid)
def external_entity_ref(self, context, base, sysid, pubid):
if not self._external_ges:
return 1
source = self._ent_handler.resolveEntity(pubid, sysid)
source = saxutils.prepare_input_source(source,
self._source.getSystemId() or
"")
self._entity_stack.append((self._parser, self._source))
self._parser = self._parser.ExternalEntityParserCreate(context)
self._source = source
try:
xmlreader.IncrementalParser.parse(self, source)
except:
return 0 # FIXME: save error info here?
(self._parser, self._source) = self._entity_stack[-1]
del self._entity_stack[-1]
return 1
def skipped_entity_handler(self, name, is_pe):
if is_pe:
# The SAX spec requires to report skipped PEs with a '%'
name = '%'+name
self._cont_handler.skippedEntity(name)
# ---
def create_parser(*args, **kwargs):
return ExpatParser(*args, **kwargs)
# ---
if __name__ == "__main__":
import xml.sax.saxutils
p = create_parser()
p.setContentHandler(xml.sax.saxutils.XMLGenerator())
p.setErrorHandler(xml.sax.ErrorHandler())
p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")

342
Lib/xml/sax/handler.py Normal file
View File

@@ -0,0 +1,342 @@
"""
This module contains the core classes of version 2.0 of SAX for Python.
This file provides only default classes with absolutely minimum
functionality, from which drivers and applications can be subclassed.
Many of these classes are empty and are included only as documentation
of the interfaces.
$Id$
"""
version = '2.0beta'
#============================================================================
#
# HANDLER INTERFACES
#
#============================================================================
# ===== ERRORHANDLER =====
class ErrorHandler:
"""Basic interface for SAX error handlers.
If you create an object that implements this interface, then
register the object with your XMLReader, the parser will call the
methods in your object to report all warnings and errors. There
are three levels of errors available: warnings, (possibly)
recoverable errors, and unrecoverable errors. All methods take a
SAXParseException as the only parameter."""
def error(self, exception):
"Handle a recoverable error."
raise exception
def fatalError(self, exception):
"Handle a non-recoverable error."
raise exception
def warning(self, exception):
"Handle a warning."
print(exception)
# ===== CONTENTHANDLER =====
class ContentHandler:
"""Interface for receiving logical document content events.
This is the main callback interface in SAX, and the one most
important to applications. The order of events in this interface
mirrors the order of the information in the document."""
def __init__(self):
self._locator = None
def setDocumentLocator(self, locator):
"""Called by the parser to give the application a locator for
locating the origin of document events.
SAX parsers are strongly encouraged (though not absolutely
required) to supply a locator: if it does so, it must supply
the locator to the application by invoking this method before
invoking any of the other methods in the DocumentHandler
interface.
The locator allows the application to determine the end
position of any document-related event, even if the parser is
not reporting an error. Typically, the application will use
this information for reporting its own errors (such as
character content that does not match an application's
business rules). The information returned by the locator is
probably not sufficient for use with a search engine.
Note that the locator will return correct information only
during the invocation of the events in this interface. The
application should not attempt to use it at any other time."""
self._locator = locator
def startDocument(self):
"""Receive notification of the beginning of a document.
The SAX parser will invoke this method only once, before any
other methods in this interface or in DTDHandler (except for
setDocumentLocator)."""
def endDocument(self):
"""Receive notification of the end of a document.
The SAX parser will invoke this method only once, and it will
be the last method invoked during the parse. The parser shall
not invoke this method until it has either abandoned parsing
(because of an unrecoverable error) or reached the end of
input."""
def startPrefixMapping(self, prefix, uri):
"""Begin the scope of a prefix-URI Namespace mapping.
The information from this event is not necessary for normal
Namespace processing: the SAX XML reader will automatically
replace prefixes for element and attribute names when the
http://xml.org/sax/features/namespaces feature is true (the
default).
There are cases, however, when applications need to use
prefixes in character data or in attribute values, where they
cannot safely be expanded automatically; the
start/endPrefixMapping event supplies the information to the
application to expand prefixes in those contexts itself, if
necessary.
Note that start/endPrefixMapping events are not guaranteed to
be properly nested relative to each-other: all
startPrefixMapping events will occur before the corresponding
startElement event, and all endPrefixMapping events will occur
after the corresponding endElement event, but their order is
not guaranteed."""
def endPrefixMapping(self, prefix):
"""End the scope of a prefix-URI mapping.
See startPrefixMapping for details. This event will always
occur after the corresponding endElement event, but the order
of endPrefixMapping events is not otherwise guaranteed."""
def startElement(self, name, attrs):
"""Signals the start of an element in non-namespace mode.
The name parameter contains the raw XML 1.0 name of the
element type as a string and the attrs parameter holds an
instance of the Attributes class containing the attributes of
the element."""
def endElement(self, name):
"""Signals the end of an element in non-namespace mode.
The name parameter contains the name of the element type, just
as with the startElement event."""
def startElementNS(self, name, qname, attrs):
"""Signals the start of an element in namespace mode.
The name parameter contains the name of the element type as a
(uri, localname) tuple, the qname parameter the raw XML 1.0
name used in the source document, and the attrs parameter
holds an instance of the Attributes class containing the
attributes of the element.
The uri part of the name tuple is None for elements which have
no namespace."""
def endElementNS(self, name, qname):
"""Signals the end of an element in namespace mode.
The name parameter contains the name of the element type, just
as with the startElementNS event."""
def characters(self, content):
"""Receive notification of character data.
The Parser will call this method to report each chunk of
character data. SAX parsers may return all contiguous
character data in a single chunk, or they may split it into
several chunks; however, all of the characters in any single
event must come from the same external entity so that the
Locator provides useful information."""
def ignorableWhitespace(self, whitespace):
"""Receive notification of ignorable whitespace in element content.
Validating Parsers must use this method to report each chunk
of ignorable whitespace (see the W3C XML 1.0 recommendation,
section 2.10): non-validating parsers may also use this method
if they are capable of parsing and using content models.
SAX parsers may return all contiguous whitespace in a single
chunk, or they may split it into several chunks; however, all
of the characters in any single event must come from the same
external entity, so that the Locator provides useful
information."""
def processingInstruction(self, target, data):
"""Receive notification of a processing instruction.
The Parser will invoke this method once for each processing
instruction found: note that processing instructions may occur
before or after the main document element.
A SAX parser should never report an XML declaration (XML 1.0,
section 2.8) or a text declaration (XML 1.0, section 4.3.1)
using this method."""
def skippedEntity(self, name):
"""Receive notification of a skipped entity.
The Parser will invoke this method once for each entity
skipped. Non-validating processors may skip entities if they
have not seen the declarations (because, for example, the
entity was declared in an external DTD subset). All processors
may skip external entities, depending on the values of the
http://xml.org/sax/features/external-general-entities and the
http://xml.org/sax/features/external-parameter-entities
properties."""
# ===== DTDHandler =====
class DTDHandler:
"""Handle DTD events.
This interface specifies only those DTD events required for basic
parsing (unparsed entities and attributes)."""
def notationDecl(self, name, publicId, systemId):
"Handle a notation declaration event."
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
"Handle an unparsed entity declaration event."
# ===== ENTITYRESOLVER =====
class EntityResolver:
"""Basic interface for resolving entities. If you create an object
implementing this interface, then register the object with your
Parser, the parser will call the method in your object to
resolve all external entities. Note that DefaultHandler implements
this interface with the default behaviour."""
def resolveEntity(self, publicId, systemId):
"""Resolve the system identifier of an entity and return either
the system identifier to read from as a string, or an InputSource
to read from."""
return systemId
#============================================================================
#
# CORE FEATURES
#
#============================================================================
feature_namespaces = "http://xml.org/sax/features/namespaces"
# true: Perform Namespace processing (default).
# false: Optionally do not perform Namespace processing
# (implies namespace-prefixes).
# access: (parsing) read-only; (not parsing) read/write
feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
# true: Report the original prefixed names and attributes used for Namespace
# declarations.
# false: Do not report attributes used for Namespace declarations, and
# optionally do not report original prefixed names (default).
# access: (parsing) read-only; (not parsing) read/write
feature_string_interning = "http://xml.org/sax/features/string-interning"
# true: All element names, prefixes, attribute names, Namespace URIs, and
# local names are interned using the built-in intern function.
# false: Names are not necessarily interned, although they may be (default).
# access: (parsing) read-only; (not parsing) read/write
feature_validation = "http://xml.org/sax/features/validation"
# true: Report all validation errors (implies external-general-entities and
# external-parameter-entities).
# false: Do not report validation errors.
# access: (parsing) read-only; (not parsing) read/write
feature_external_ges = "http://xml.org/sax/features/external-general-entities"
# true: Include all external general (text) entities.
# false: Do not include external general entities.
# access: (parsing) read-only; (not parsing) read/write
feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
# true: Include all external parameter entities, including the external
# DTD subset.
# false: Do not include any external parameter entities, even the external
# DTD subset.
# access: (parsing) read-only; (not parsing) read/write
all_features = [feature_namespaces,
feature_namespace_prefixes,
feature_string_interning,
feature_validation,
feature_external_ges,
feature_external_pes]
#============================================================================
#
# CORE PROPERTIES
#
#============================================================================
property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
# data type: xml.sax.sax2lib.LexicalHandler
# description: An optional extension handler for lexical events like comments.
# access: read/write
property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
# data type: xml.sax.sax2lib.DeclHandler
# description: An optional extension handler for DTD-related events other
# than notations and unparsed entities.
# access: read/write
property_dom_node = "http://xml.org/sax/properties/dom-node"
# data type: org.w3c.dom.Node
# description: When parsing, the current DOM node being visited if this is
# a DOM iterator; when not parsing, the root DOM node for
# iteration.
# access: (parsing) read-only; (not parsing) read/write
property_xml_string = "http://xml.org/sax/properties/xml-string"
# data type: String
# description: The literal string of characters that was the source for
# the current event.
# access: read-only
property_encoding = "http://www.python.org/sax/properties/encoding"
# data type: String
# description: The name of the encoding to assume for input data.
# access: write: set the encoding, e.g. established by a higher-level
# protocol. May change during parsing (e.g. after
# processing a META tag)
# read: return the current encoding (possibly established through
# auto-detection.
# initial value: UTF-8
#
property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
# data type: Dictionary
# description: The dictionary used to intern common strings in the document
# access: write: Request that the parser uses a specific dictionary, to
# allow interning across different documents
# read: return the current interning dictionary, or None
#
all_properties = [property_lexical_handler,
property_dom_node,
property_declaration_handler,
property_xml_string,
property_encoding,
property_interning_dict]

368
Lib/xml/sax/saxutils.py Normal file
View File

@@ -0,0 +1,368 @@
"""\
A library of useful helper classes to the SAX classes, for the
convenience of application and driver writers.
"""
import os, urllib.parse, urllib.request
import io
import codecs
from . import handler
from . import xmlreader
def __dict_replace(s, d):
"""Replace substrings of a string using a dictionary."""
for key, value in d.items():
s = s.replace(key, value)
return s
def escape(data, entities={}):
"""Escape &, <, and > in a string of data.
You can escape other strings of data by passing a dictionary as
the optional entities parameter. The keys and values must all be
strings; each key will be replaced with its corresponding value.
"""
# must do ampersand first
data = data.replace("&", "&amp;")
data = data.replace(">", "&gt;")
data = data.replace("<", "&lt;")
if entities:
data = __dict_replace(data, entities)
return data
def unescape(data, entities={}):
"""Unescape &amp;, &lt;, and &gt; in a string of data.
You can unescape other strings of data by passing a dictionary as
the optional entities parameter. The keys and values must all be
strings; each key will be replaced with its corresponding value.
"""
data = data.replace("&lt;", "<")
data = data.replace("&gt;", ">")
if entities:
data = __dict_replace(data, entities)
# must do ampersand last
return data.replace("&amp;", "&")
def quoteattr(data, entities={}):
"""Escape and quote an attribute value.
Escape &, <, and > in a string of data, then quote it for use as
an attribute value. The \" character will be escaped as well, if
necessary.
You can escape other strings of data by passing a dictionary as
the optional entities parameter. The keys and values must all be
strings; each key will be replaced with its corresponding value.
"""
entities = entities.copy()
entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
data = escape(data, entities)
if '"' in data:
if "'" in data:
data = '"%s"' % data.replace('"', "&quot;")
else:
data = "'%s'" % data
else:
data = '"%s"' % data
return data
def _gettextwriter(out, encoding):
if out is None:
import sys
return sys.stdout
if isinstance(out, io.TextIOBase):
# use a text writer as is
return out
if isinstance(out, (codecs.StreamWriter, codecs.StreamReaderWriter)):
# use a codecs stream writer as is
return out
# wrap a binary writer with TextIOWrapper
if isinstance(out, io.RawIOBase):
# Keep the original file open when the TextIOWrapper is
# destroyed
class _wrapper:
__class__ = out.__class__
def __getattr__(self, name):
return getattr(out, name)
buffer = _wrapper()
buffer.close = lambda: None
else:
# This is to handle passed objects that aren't in the
# IOBase hierarchy, but just have a write method
buffer = io.BufferedIOBase()
buffer.writable = lambda: True
buffer.write = out.write
try:
# TextIOWrapper uses this methods to determine
# if BOM (for UTF-16, etc) should be added
buffer.seekable = out.seekable
buffer.tell = out.tell
except AttributeError:
pass
return io.TextIOWrapper(buffer, encoding=encoding,
errors='xmlcharrefreplace',
newline='\n',
write_through=True)
class XMLGenerator(handler.ContentHandler):
def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False):
handler.ContentHandler.__init__(self)
out = _gettextwriter(out, encoding)
self._write = out.write
self._flush = out.flush
self._ns_contexts = [{}] # contains uri -> prefix dicts
self._current_context = self._ns_contexts[-1]
self._undeclared_ns_maps = []
self._encoding = encoding
self._short_empty_elements = short_empty_elements
self._pending_start_element = False
def _qname(self, name):
"""Builds a qualified name from a (ns_url, localname) pair"""
if name[0]:
# Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
# bound by definition to http://www.w3.org/XML/1998/namespace. It
# does not need to be declared and will not usually be found in
# self._current_context.
if 'http://www.w3.org/XML/1998/namespace' == name[0]:
return 'xml:' + name[1]
# The name is in a non-empty namespace
prefix = self._current_context[name[0]]
if prefix:
# If it is not the default namespace, prepend the prefix
return prefix + ":" + name[1]
# Return the unqualified name
return name[1]
def _finish_pending_start_element(self,endElement=False):
if self._pending_start_element:
self._write('>')
self._pending_start_element = False
# ContentHandler methods
def startDocument(self):
self._write('<?xml version="1.0" encoding="%s"?>\n' %
self._encoding)
def endDocument(self):
self._flush()
def startPrefixMapping(self, prefix, uri):
self._ns_contexts.append(self._current_context.copy())
self._current_context[uri] = prefix
self._undeclared_ns_maps.append((prefix, uri))
def endPrefixMapping(self, prefix):
self._current_context = self._ns_contexts[-1]
del self._ns_contexts[-1]
def startElement(self, name, attrs):
self._finish_pending_start_element()
self._write('<' + name)
for (name, value) in attrs.items():
self._write(' %s=%s' % (name, quoteattr(value)))
if self._short_empty_elements:
self._pending_start_element = True
else:
self._write(">")
def endElement(self, name):
if self._pending_start_element:
self._write('/>')
self._pending_start_element = False
else:
self._write('</%s>' % name)
def startElementNS(self, name, qname, attrs):
self._finish_pending_start_element()
self._write('<' + self._qname(name))
for prefix, uri in self._undeclared_ns_maps:
if prefix:
self._write(' xmlns:%s="%s"' % (prefix, uri))
else:
self._write(' xmlns="%s"' % uri)
self._undeclared_ns_maps = []
for (name, value) in attrs.items():
self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
if self._short_empty_elements:
self._pending_start_element = True
else:
self._write(">")
def endElementNS(self, name, qname):
if self._pending_start_element:
self._write('/>')
self._pending_start_element = False
else:
self._write('</%s>' % self._qname(name))
def characters(self, content):
if content:
self._finish_pending_start_element()
if not isinstance(content, str):
content = str(content, self._encoding)
self._write(escape(content))
def ignorableWhitespace(self, content):
if content:
self._finish_pending_start_element()
if not isinstance(content, str):
content = str(content, self._encoding)
self._write(content)
def processingInstruction(self, target, data):
self._finish_pending_start_element()
self._write('<?%s %s?>' % (target, data))
class XMLFilterBase(xmlreader.XMLReader):
"""This class is designed to sit between an XMLReader and the
client application's event handlers. By default, it does nothing
but pass requests up to the reader and events on to the handlers
unmodified, but subclasses can override specific methods to modify
the event stream or the configuration requests as they pass
through."""
def __init__(self, parent = None):
xmlreader.XMLReader.__init__(self)
self._parent = parent
# ErrorHandler methods
def error(self, exception):
self._err_handler.error(exception)
def fatalError(self, exception):
self._err_handler.fatalError(exception)
def warning(self, exception):
self._err_handler.warning(exception)
# ContentHandler methods
def setDocumentLocator(self, locator):
self._cont_handler.setDocumentLocator(locator)
def startDocument(self):
self._cont_handler.startDocument()
def endDocument(self):
self._cont_handler.endDocument()
def startPrefixMapping(self, prefix, uri):
self._cont_handler.startPrefixMapping(prefix, uri)
def endPrefixMapping(self, prefix):
self._cont_handler.endPrefixMapping(prefix)
def startElement(self, name, attrs):
self._cont_handler.startElement(name, attrs)
def endElement(self, name):
self._cont_handler.endElement(name)
def startElementNS(self, name, qname, attrs):
self._cont_handler.startElementNS(name, qname, attrs)
def endElementNS(self, name, qname):
self._cont_handler.endElementNS(name, qname)
def characters(self, content):
self._cont_handler.characters(content)
def ignorableWhitespace(self, chars):
self._cont_handler.ignorableWhitespace(chars)
def processingInstruction(self, target, data):
self._cont_handler.processingInstruction(target, data)
def skippedEntity(self, name):
self._cont_handler.skippedEntity(name)
# DTDHandler methods
def notationDecl(self, name, publicId, systemId):
self._dtd_handler.notationDecl(name, publicId, systemId)
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
# EntityResolver methods
def resolveEntity(self, publicId, systemId):
return self._ent_handler.resolveEntity(publicId, systemId)
# XMLReader methods
def parse(self, source):
self._parent.setContentHandler(self)
self._parent.setErrorHandler(self)
self._parent.setEntityResolver(self)
self._parent.setDTDHandler(self)
self._parent.parse(source)
def setLocale(self, locale):
self._parent.setLocale(locale)
def getFeature(self, name):
return self._parent.getFeature(name)
def setFeature(self, name, state):
self._parent.setFeature(name, state)
def getProperty(self, name):
return self._parent.getProperty(name)
def setProperty(self, name, value):
self._parent.setProperty(name, value)
# XMLFilter methods
def getParent(self):
return self._parent
def setParent(self, parent):
self._parent = parent
# --- Utility functions
def prepare_input_source(source, base=""):
"""This function takes an InputSource and an optional base URL and
returns a fully resolved InputSource object ready for reading."""
if isinstance(source, str):
source = xmlreader.InputSource(source)
elif hasattr(source, "read"):
f = source
source = xmlreader.InputSource()
if isinstance(f.read(0), str):
source.setCharacterStream(f)
else:
source.setByteStream(f)
if hasattr(f, "name") and isinstance(f.name, str):
source.setSystemId(f.name)
if source.getCharacterStream() is None and source.getByteStream() is None:
sysid = source.getSystemId()
basehead = os.path.dirname(os.path.normpath(base))
sysidfilename = os.path.join(basehead, sysid)
if os.path.isfile(sysidfilename):
source.setSystemId(sysidfilename)
f = open(sysidfilename, "rb")
else:
source.setSystemId(urllib.parse.urljoin(base, sysid))
f = urllib.request.urlopen(source.getSystemId())
source.setByteStream(f)
return source

380
Lib/xml/sax/xmlreader.py Normal file
View File

@@ -0,0 +1,380 @@
"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
should be based on this code. """
from . import handler
from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException
# ===== XMLREADER =====
class XMLReader:
"""Interface for reading an XML document using callbacks.
XMLReader is the interface that an XML parser's SAX2 driver must
implement. This interface allows an application to set and query
features and properties in the parser, to register event handlers
for document processing, and to initiate a document parse.
All SAX interfaces are assumed to be synchronous: the parse
methods must not return until parsing is complete, and readers
must wait for an event-handler callback to return before reporting
the next event."""
def __init__(self):
self._cont_handler = handler.ContentHandler()
self._dtd_handler = handler.DTDHandler()
self._ent_handler = handler.EntityResolver()
self._err_handler = handler.ErrorHandler()
def parse(self, source):
"Parse an XML document from a system identifier or an InputSource."
raise NotImplementedError("This method must be implemented!")
def getContentHandler(self):
"Returns the current ContentHandler."
return self._cont_handler
def setContentHandler(self, handler):
"Registers a new object to receive document content events."
self._cont_handler = handler
def getDTDHandler(self):
"Returns the current DTD handler."
return self._dtd_handler
def setDTDHandler(self, handler):
"Register an object to receive basic DTD-related events."
self._dtd_handler = handler
def getEntityResolver(self):
"Returns the current EntityResolver."
return self._ent_handler
def setEntityResolver(self, resolver):
"Register an object to resolve external entities."
self._ent_handler = resolver
def getErrorHandler(self):
"Returns the current ErrorHandler."
return self._err_handler
def setErrorHandler(self, handler):
"Register an object to receive error-message events."
self._err_handler = handler
def setLocale(self, locale):
"""Allow an application to set the locale for errors and warnings.
SAX parsers are not required to provide localization for errors
and warnings; if they cannot support the requested locale,
however, they must raise a SAX exception. Applications may
request a locale change in the middle of a parse."""
raise SAXNotSupportedException("Locale support not implemented")
def getFeature(self, name):
"Looks up and returns the state of a SAX2 feature."
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
def setFeature(self, name, state):
"Sets the state of a SAX2 feature."
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
def getProperty(self, name):
"Looks up and returns the value of a SAX2 property."
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
def setProperty(self, name, value):
"Sets the value of a SAX2 property."
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
class IncrementalParser(XMLReader):
"""This interface adds three extra methods to the XMLReader
interface that allow XML parsers to support incremental
parsing. Support for this interface is optional, since not all
underlying XML parsers support this functionality.
When the parser is instantiated it is ready to begin accepting
data from the feed method immediately. After parsing has been
finished with a call to close the reset method must be called to
make the parser ready to accept new data, either from feed or
using the parse method.
Note that these methods must _not_ be called during parsing, that
is, after parse has been called and before it returns.
By default, the class also implements the parse method of the XMLReader
interface using the feed, close and reset methods of the
IncrementalParser interface as a convenience to SAX 2.0 driver
writers."""
def __init__(self, bufsize=2**16):
self._bufsize = bufsize
XMLReader.__init__(self)
def parse(self, source):
from . import saxutils
source = saxutils.prepare_input_source(source)
self.prepareParser(source)
file = source.getCharacterStream()
if file is None:
file = source.getByteStream()
buffer = file.read(self._bufsize)
while buffer:
self.feed(buffer)
buffer = file.read(self._bufsize)
self.close()
def feed(self, data):
"""This method gives the raw XML data in the data parameter to
the parser and makes it parse the data, emitting the
corresponding events. It is allowed for XML constructs to be
split across several calls to feed.
feed may raise SAXException."""
raise NotImplementedError("This method must be implemented!")
def prepareParser(self, source):
"""This method is called by the parse implementation to allow
the SAX 2.0 driver to prepare itself for parsing."""
raise NotImplementedError("prepareParser must be overridden!")
def close(self):
"""This method is called when the entire XML document has been
passed to the parser through the feed method, to notify the
parser that there are no more data. This allows the parser to
do the final checks on the document and empty the internal
data buffer.
The parser will not be ready to parse another document until
the reset method has been called.
close may raise SAXException."""
raise NotImplementedError("This method must be implemented!")
def reset(self):
"""This method is called after close has been called to reset
the parser so that it is ready to parse new documents. The
results of calling parse or feed after close without calling
reset are undefined."""
raise NotImplementedError("This method must be implemented!")
# ===== LOCATOR =====
class Locator:
"""Interface for associating a SAX event with a document
location. A locator object will return valid results only during
calls to DocumentHandler methods; at any other time, the
results are unpredictable."""
def getColumnNumber(self):
"Return the column number where the current event ends."
return -1
def getLineNumber(self):
"Return the line number where the current event ends."
return -1
def getPublicId(self):
"Return the public identifier for the current event."
return None
def getSystemId(self):
"Return the system identifier for the current event."
return None
# ===== INPUTSOURCE =====
class InputSource:
"""Encapsulation of the information needed by the XMLReader to
read entities.
This class may include information about the public identifier,
system identifier, byte stream (possibly with character encoding
information) and/or the character stream of an entity.
Applications will create objects of this class for use in the
XMLReader.parse method and for returning from
EntityResolver.resolveEntity.
An InputSource belongs to the application, the XMLReader is not
allowed to modify InputSource objects passed to it from the
application, although it may make copies and modify those."""
def __init__(self, system_id = None):
self.__system_id = system_id
self.__public_id = None
self.__encoding = None
self.__bytefile = None
self.__charfile = None
def setPublicId(self, public_id):
"Sets the public identifier of this InputSource."
self.__public_id = public_id
def getPublicId(self):
"Returns the public identifier of this InputSource."
return self.__public_id
def setSystemId(self, system_id):
"Sets the system identifier of this InputSource."
self.__system_id = system_id
def getSystemId(self):
"Returns the system identifier of this InputSource."
return self.__system_id
def setEncoding(self, encoding):
"""Sets the character encoding of this InputSource.
The encoding must be a string acceptable for an XML encoding
declaration (see section 4.3.3 of the XML recommendation).
The encoding attribute of the InputSource is ignored if the
InputSource also contains a character stream."""
self.__encoding = encoding
def getEncoding(self):
"Get the character encoding of this InputSource."
return self.__encoding
def setByteStream(self, bytefile):
"""Set the byte stream (a Python file-like object which does
not perform byte-to-character conversion) for this input
source.
The SAX parser will ignore this if there is also a character
stream specified, but it will use a byte stream in preference
to opening a URI connection itself.
If the application knows the character encoding of the byte
stream, it should set it with the setEncoding method."""
self.__bytefile = bytefile
def getByteStream(self):
"""Get the byte stream for this input source.
The getEncoding method will return the character encoding for
this byte stream, or None if unknown."""
return self.__bytefile
def setCharacterStream(self, charfile):
"""Set the character stream for this input source. (The stream
must be a Python 2.0 Unicode-wrapped file-like that performs
conversion to Unicode strings.)
If there is a character stream specified, the SAX parser will
ignore any byte stream and will not attempt to open a URI
connection to the system identifier."""
self.__charfile = charfile
def getCharacterStream(self):
"Get the character stream for this input source."
return self.__charfile
# ===== ATTRIBUTESIMPL =====
class AttributesImpl:
def __init__(self, attrs):
"""Non-NS-aware implementation.
attrs should be of the form {name : value}."""
self._attrs = attrs
def getLength(self):
return len(self._attrs)
def getType(self, name):
return "CDATA"
def getValue(self, name):
return self._attrs[name]
def getValueByQName(self, name):
return self._attrs[name]
def getNameByQName(self, name):
if name not in self._attrs:
raise KeyError(name)
return name
def getQNameByName(self, name):
if name not in self._attrs:
raise KeyError(name)
return name
def getNames(self):
return list(self._attrs.keys())
def getQNames(self):
return list(self._attrs.keys())
def __len__(self):
return len(self._attrs)
def __getitem__(self, name):
return self._attrs[name]
def keys(self):
return list(self._attrs.keys())
def __contains__(self, name):
return name in self._attrs
def get(self, name, alternative=None):
return self._attrs.get(name, alternative)
def copy(self):
return self.__class__(self._attrs)
def items(self):
return list(self._attrs.items())
def values(self):
return list(self._attrs.values())
# ===== ATTRIBUTESNSIMPL =====
class AttributesNSImpl(AttributesImpl):
def __init__(self, attrs, qnames):
"""NS-aware implementation.
attrs should be of the form {(ns_uri, lname): value, ...}.
qnames of the form {(ns_uri, lname): qname, ...}."""
self._attrs = attrs
self._qnames = qnames
def getValueByQName(self, name):
for (nsname, qname) in self._qnames.items():
if qname == name:
return self._attrs[nsname]
raise KeyError(name)
def getNameByQName(self, name):
for (nsname, qname) in self._qnames.items():
if qname == name:
return nsname
raise KeyError(name)
def getQNameByName(self, name):
return self._qnames[name]
def getQNames(self):
return list(self._qnames.values())
def copy(self):
return self.__class__(self._attrs, self._qnames)
def _test():
XMLReader()
IncrementalParser()
Locator()
if __name__ == "__main__":
_test()