Merge pull request #1643 from RustPython/coolreader18/http

Add the http module
This commit is contained in:
Jeong YunWon
2019-12-24 16:41:08 +09:00
committed by GitHub
28 changed files with 16769 additions and 17 deletions

713
Lib/calendar.py vendored Normal file
View File

@@ -0,0 +1,713 @@
"""Calendar printing functions
Note when comparing these calendars to the ones printed by cal(1): By
default, these calendars have Monday as the first day of the week, and
Sunday as the last (the European convention). Use setfirstweekday() to
set the first day of the week (0=Monday, 6=Sunday)."""
import sys
import datetime
import locale as _locale
from itertools import repeat
__all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday",
"firstweekday", "isleap", "leapdays", "weekday", "monthrange",
"monthcalendar", "prmonth", "month", "prcal", "calendar",
"timegm", "month_name", "month_abbr", "day_name", "day_abbr",
"Calendar", "TextCalendar", "HTMLCalendar", "LocaleTextCalendar",
"LocaleHTMLCalendar", "weekheader"]
# Exception raised for bad input (with string parameter for details)
error = ValueError
# Exceptions raised for bad input
class IllegalMonthError(ValueError):
def __init__(self, month):
self.month = month
def __str__(self):
return "bad month number %r; must be 1-12" % self.month
class IllegalWeekdayError(ValueError):
def __init__(self, weekday):
self.weekday = weekday
def __str__(self):
return "bad weekday number %r; must be 0 (Monday) to 6 (Sunday)" % self.weekday
# Constants for months referenced later
January = 1
February = 2
# Number of days per month (except for February in leap years)
mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
# This module used to have hard-coded lists of day and month names, as
# English strings. The classes following emulate a read-only version of
# that, but supply localized names. Note that the values are computed
# fresh on each call, in case the user changes locale between calls.
class _localized_month:
_months = [datetime.date(2001, i+1, 1).strftime for i in range(12)]
_months.insert(0, lambda x: "")
def __init__(self, format):
self.format = format
def __getitem__(self, i):
funcs = self._months[i]
if isinstance(i, slice):
return [f(self.format) for f in funcs]
else:
return funcs(self.format)
def __len__(self):
return 13
class _localized_day:
# January 1, 2001, was a Monday.
_days = [datetime.date(2001, 1, i+1).strftime for i in range(7)]
def __init__(self, format):
self.format = format
def __getitem__(self, i):
funcs = self._days[i]
if isinstance(i, slice):
return [f(self.format) for f in funcs]
else:
return funcs(self.format)
def __len__(self):
return 7
# Full and abbreviated names of weekdays
day_name = _localized_day('%A')
day_abbr = _localized_day('%a')
# Full and abbreviated names of months (1-based arrays!!!)
month_name = _localized_month('%B')
month_abbr = _localized_month('%b')
# Constants for weekdays
(MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) = range(7)
def isleap(year):
"""Return True for leap years, False for non-leap years."""
return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
def leapdays(y1, y2):
"""Return number of leap years in range [y1, y2).
Assume y1 <= y2."""
y1 -= 1
y2 -= 1
return (y2//4 - y1//4) - (y2//100 - y1//100) + (y2//400 - y1//400)
def weekday(year, month, day):
"""Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month (1-12),
day (1-31)."""
return datetime.date(year, month, day).weekday()
def monthrange(year, month):
"""Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for
year, month."""
if not 1 <= month <= 12:
raise IllegalMonthError(month)
day1 = weekday(year, month, 1)
ndays = mdays[month] + (month == February and isleap(year))
return day1, ndays
class Calendar(object):
"""
Base calendar class. This class doesn't do any formatting. It simply
provides data to subclasses.
"""
def __init__(self, firstweekday=0):
self.firstweekday = firstweekday # 0 = Monday, 6 = Sunday
def getfirstweekday(self):
return self._firstweekday % 7
def setfirstweekday(self, firstweekday):
self._firstweekday = firstweekday
firstweekday = property(getfirstweekday, setfirstweekday)
def iterweekdays(self):
"""
Return an iterator for one week of weekday numbers starting with the
configured first one.
"""
for i in range(self.firstweekday, self.firstweekday + 7):
yield i%7
def itermonthdates(self, year, month):
"""
Return an iterator for one month. The iterator will yield datetime.date
values and will always iterate through complete weeks, so it will yield
dates outside the specified month.
"""
date = datetime.date(year, month, 1)
# Go back to the beginning of the week
days = (date.weekday() - self.firstweekday) % 7
date -= datetime.timedelta(days=days)
oneday = datetime.timedelta(days=1)
while True:
yield date
try:
date += oneday
except OverflowError:
# Adding one day could fail after datetime.MAXYEAR
break
if date.month != month and date.weekday() == self.firstweekday:
break
def itermonthdays2(self, year, month):
"""
Like itermonthdates(), but will yield (day number, weekday number)
tuples. For days outside the specified month the day number is 0.
"""
for i, d in enumerate(self.itermonthdays(year, month), self.firstweekday):
yield d, i % 7
def itermonthdays(self, year, month):
"""
Like itermonthdates(), but will yield day numbers. For days outside
the specified month the day number is 0.
"""
day1, ndays = monthrange(year, month)
days_before = (day1 - self.firstweekday) % 7
yield from repeat(0, days_before)
yield from range(1, ndays + 1)
days_after = (self.firstweekday - day1 - ndays) % 7
yield from repeat(0, days_after)
def monthdatescalendar(self, year, month):
"""
Return a matrix (list of lists) representing a month's calendar.
Each row represents a week; week entries are datetime.date values.
"""
dates = list(self.itermonthdates(year, month))
return [ dates[i:i+7] for i in range(0, len(dates), 7) ]
def monthdays2calendar(self, year, month):
"""
Return a matrix representing a month's calendar.
Each row represents a week; week entries are
(day number, weekday number) tuples. Day numbers outside this month
are zero.
"""
days = list(self.itermonthdays2(year, month))
return [ days[i:i+7] for i in range(0, len(days), 7) ]
def monthdayscalendar(self, year, month):
"""
Return a matrix representing a month's calendar.
Each row represents a week; days outside this month are zero.
"""
days = list(self.itermonthdays(year, month))
return [ days[i:i+7] for i in range(0, len(days), 7) ]
def yeardatescalendar(self, year, width=3):
"""
Return the data for the specified year ready for formatting. The return
value is a list of month rows. Each month row contains up to width months.
Each month contains between 4 and 6 weeks and each week contains 1-7
days. Days are datetime.date objects.
"""
months = [
self.monthdatescalendar(year, i)
for i in range(January, January+12)
]
return [months[i:i+width] for i in range(0, len(months), width) ]
def yeardays2calendar(self, year, width=3):
"""
Return the data for the specified year ready for formatting (similar to
yeardatescalendar()). Entries in the week lists are
(day number, weekday number) tuples. Day numbers outside this month are
zero.
"""
months = [
self.monthdays2calendar(year, i)
for i in range(January, January+12)
]
return [months[i:i+width] for i in range(0, len(months), width) ]
def yeardayscalendar(self, year, width=3):
"""
Return the data for the specified year ready for formatting (similar to
yeardatescalendar()). Entries in the week lists are day numbers.
Day numbers outside this month are zero.
"""
months = [
self.monthdayscalendar(year, i)
for i in range(January, January+12)
]
return [months[i:i+width] for i in range(0, len(months), width) ]
class TextCalendar(Calendar):
"""
Subclass of Calendar that outputs a calendar as a simple plain text
similar to the UNIX program cal.
"""
def prweek(self, theweek, width):
"""
Print a single week (no newline).
"""
print(self.formatweek(theweek, width), end=' ')
def formatday(self, day, weekday, width):
"""
Returns a formatted day.
"""
if day == 0:
s = ''
else:
s = '%2i' % day # right-align single-digit days
return s.center(width)
def formatweek(self, theweek, width):
"""
Returns a single week in a string (no newline).
"""
return ' '.join(self.formatday(d, wd, width) for (d, wd) in theweek)
def formatweekday(self, day, width):
"""
Returns a formatted week day name.
"""
if width >= 9:
names = day_name
else:
names = day_abbr
return names[day][:width].center(width)
def formatweekheader(self, width):
"""
Return a header for a week.
"""
return ' '.join(self.formatweekday(i, width) for i in self.iterweekdays())
def formatmonthname(self, theyear, themonth, width, withyear=True):
"""
Return a formatted month name.
"""
s = month_name[themonth]
if withyear:
s = "%s %r" % (s, theyear)
return s.center(width)
def prmonth(self, theyear, themonth, w=0, l=0):
"""
Print a month's calendar.
"""
print(self.formatmonth(theyear, themonth, w, l), end='')
def formatmonth(self, theyear, themonth, w=0, l=0):
"""
Return a month's calendar string (multi-line).
"""
w = max(2, w)
l = max(1, l)
s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1)
s = s.rstrip()
s += '\n' * l
s += self.formatweekheader(w).rstrip()
s += '\n' * l
for week in self.monthdays2calendar(theyear, themonth):
s += self.formatweek(week, w).rstrip()
s += '\n' * l
return s
def formatyear(self, theyear, w=2, l=1, c=6, m=3):
"""
Returns a year's calendar as a multi-line string.
"""
w = max(2, w)
l = max(1, l)
c = max(2, c)
colwidth = (w + 1) * 7 - 1
v = []
a = v.append
a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip())
a('\n'*l)
header = self.formatweekheader(w)
for (i, row) in enumerate(self.yeardays2calendar(theyear, m)):
# months in this row
months = range(m*i+1, min(m*(i+1)+1, 13))
a('\n'*l)
names = (self.formatmonthname(theyear, k, colwidth, False)
for k in months)
a(formatstring(names, colwidth, c).rstrip())
a('\n'*l)
headers = (header for k in months)
a(formatstring(headers, colwidth, c).rstrip())
a('\n'*l)
# max number of weeks for this row
height = max(len(cal) for cal in row)
for j in range(height):
weeks = []
for cal in row:
if j >= len(cal):
weeks.append('')
else:
weeks.append(self.formatweek(cal[j], w))
a(formatstring(weeks, colwidth, c).rstrip())
a('\n' * l)
return ''.join(v)
def pryear(self, theyear, w=0, l=0, c=6, m=3):
"""Print a year's calendar."""
print(self.formatyear(theyear, w, l, c, m))
class HTMLCalendar(Calendar):
"""
This calendar returns complete HTML pages.
"""
# CSS classes for the day <td>s
cssclasses = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"]
def formatday(self, day, weekday):
"""
Return a day as a table cell.
"""
if day == 0:
return '<td class="noday">&nbsp;</td>' # day outside month
else:
return '<td class="%s">%d</td>' % (self.cssclasses[weekday], day)
def formatweek(self, theweek):
"""
Return a complete week as a table row.
"""
s = ''.join(self.formatday(d, wd) for (d, wd) in theweek)
return '<tr>%s</tr>' % s
def formatweekday(self, day):
"""
Return a weekday name as a table header.
"""
return '<th class="%s">%s</th>' % (self.cssclasses[day], day_abbr[day])
def formatweekheader(self):
"""
Return a header for a week as a table row.
"""
s = ''.join(self.formatweekday(i) for i in self.iterweekdays())
return '<tr>%s</tr>' % s
def formatmonthname(self, theyear, themonth, withyear=True):
"""
Return a month name as a table row.
"""
if withyear:
s = '%s %s' % (month_name[themonth], theyear)
else:
s = '%s' % month_name[themonth]
return '<tr><th colspan="7" class="month">%s</th></tr>' % s
def formatmonth(self, theyear, themonth, withyear=True):
"""
Return a formatted month as a table.
"""
v = []
a = v.append
a('<table border="0" cellpadding="0" cellspacing="0" class="month">')
a('\n')
a(self.formatmonthname(theyear, themonth, withyear=withyear))
a('\n')
a(self.formatweekheader())
a('\n')
for week in self.monthdays2calendar(theyear, themonth):
a(self.formatweek(week))
a('\n')
a('</table>')
a('\n')
return ''.join(v)
def formatyear(self, theyear, width=3):
"""
Return a formatted year as a table of tables.
"""
v = []
a = v.append
width = max(width, 1)
a('<table border="0" cellpadding="0" cellspacing="0" class="year">')
a('\n')
a('<tr><th colspan="%d" class="year">%s</th></tr>' % (width, theyear))
for i in range(January, January+12, width):
# months in this row
months = range(i, min(i+width, 13))
a('<tr>')
for m in months:
a('<td>')
a(self.formatmonth(theyear, m, withyear=False))
a('</td>')
a('</tr>')
a('</table>')
return ''.join(v)
def formatyearpage(self, theyear, width=3, css='calendar.css', encoding=None):
"""
Return a formatted year as a complete HTML page.
"""
if encoding is None:
encoding = sys.getdefaultencoding()
v = []
a = v.append
a('<?xml version="1.0" encoding="%s"?>\n' % encoding)
a('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n')
a('<html>\n')
a('<head>\n')
a('<meta http-equiv="Content-Type" content="text/html; charset=%s" />\n' % encoding)
if css is not None:
a('<link rel="stylesheet" type="text/css" href="%s" />\n' % css)
a('<title>Calendar for %d</title>\n' % theyear)
a('</head>\n')
a('<body>\n')
a(self.formatyear(theyear, width))
a('</body>\n')
a('</html>\n')
return ''.join(v).encode(encoding, "xmlcharrefreplace")
class different_locale:
def __init__(self, locale):
self.locale = locale
def __enter__(self):
self.oldlocale = _locale.getlocale(_locale.LC_TIME)
_locale.setlocale(_locale.LC_TIME, self.locale)
def __exit__(self, *args):
_locale.setlocale(_locale.LC_TIME, self.oldlocale)
class LocaleTextCalendar(TextCalendar):
"""
This class can be passed a locale name in the constructor and will return
month and weekday names in the specified locale. If this locale includes
an encoding all strings containing month and weekday names will be returned
as unicode.
"""
def __init__(self, firstweekday=0, locale=None):
TextCalendar.__init__(self, firstweekday)
if locale is None:
locale = _locale.getdefaultlocale()
self.locale = locale
def formatweekday(self, day, width):
with different_locale(self.locale):
if width >= 9:
names = day_name
else:
names = day_abbr
name = names[day]
return name[:width].center(width)
def formatmonthname(self, theyear, themonth, width, withyear=True):
with different_locale(self.locale):
s = month_name[themonth]
if withyear:
s = "%s %r" % (s, theyear)
return s.center(width)
class LocaleHTMLCalendar(HTMLCalendar):
"""
This class can be passed a locale name in the constructor and will return
month and weekday names in the specified locale. If this locale includes
an encoding all strings containing month and weekday names will be returned
as unicode.
"""
def __init__(self, firstweekday=0, locale=None):
HTMLCalendar.__init__(self, firstweekday)
if locale is None:
locale = _locale.getdefaultlocale()
self.locale = locale
def formatweekday(self, day):
with different_locale(self.locale):
s = day_abbr[day]
return '<th class="%s">%s</th>' % (self.cssclasses[day], s)
def formatmonthname(self, theyear, themonth, withyear=True):
with different_locale(self.locale):
s = month_name[themonth]
if withyear:
s = '%s %s' % (s, theyear)
return '<tr><th colspan="7" class="month">%s</th></tr>' % s
# Support for old module level interface
c = TextCalendar()
firstweekday = c.getfirstweekday
def setfirstweekday(firstweekday):
if not MONDAY <= firstweekday <= SUNDAY:
raise IllegalWeekdayError(firstweekday)
c.firstweekday = firstweekday
monthcalendar = c.monthdayscalendar
prweek = c.prweek
week = c.formatweek
weekheader = c.formatweekheader
prmonth = c.prmonth
month = c.formatmonth
calendar = c.formatyear
prcal = c.pryear
# Spacing of month columns for multi-column year calendar
_colwidth = 7*3 - 1 # Amount printed by prweek()
_spacing = 6 # Number of spaces between columns
def format(cols, colwidth=_colwidth, spacing=_spacing):
"""Prints multi-column formatting for year calendars"""
print(formatstring(cols, colwidth, spacing))
def formatstring(cols, colwidth=_colwidth, spacing=_spacing):
"""Returns a string formatted from n strings, centered within n columns."""
spacing *= ' '
return spacing.join(c.center(colwidth) for c in cols)
EPOCH = 1970
_EPOCH_ORD = datetime.date(EPOCH, 1, 1).toordinal()
def timegm(tuple):
"""Unrelated but handy function to calculate Unix timestamp from GMT."""
year, month, day, hour, minute, second = tuple[:6]
days = datetime.date(year, month, 1).toordinal() - _EPOCH_ORD + day - 1
hours = days*24 + hour
minutes = hours*60 + minute
seconds = minutes*60 + second
return seconds
def main(args):
import argparse
parser = argparse.ArgumentParser()
textgroup = parser.add_argument_group('text only arguments')
htmlgroup = parser.add_argument_group('html only arguments')
textgroup.add_argument(
"-w", "--width",
type=int, default=2,
help="width of date column (default 2)"
)
textgroup.add_argument(
"-l", "--lines",
type=int, default=1,
help="number of lines for each week (default 1)"
)
textgroup.add_argument(
"-s", "--spacing",
type=int, default=6,
help="spacing between months (default 6)"
)
textgroup.add_argument(
"-m", "--months",
type=int, default=3,
help="months per row (default 3)"
)
htmlgroup.add_argument(
"-c", "--css",
default="calendar.css",
help="CSS to use for page"
)
parser.add_argument(
"-L", "--locale",
default=None,
help="locale to be used from month and weekday names"
)
parser.add_argument(
"-e", "--encoding",
default=None,
help="encoding to use for output"
)
parser.add_argument(
"-t", "--type",
default="text",
choices=("text", "html"),
help="output type (text or html)"
)
parser.add_argument(
"year",
nargs='?', type=int,
help="year number (1-9999)"
)
parser.add_argument(
"month",
nargs='?', type=int,
help="month number (1-12, text only)"
)
options = parser.parse_args(args[1:])
if options.locale and not options.encoding:
parser.error("if --locale is specified --encoding is required")
sys.exit(1)
locale = options.locale, options.encoding
if options.type == "html":
if options.locale:
cal = LocaleHTMLCalendar(locale=locale)
else:
cal = HTMLCalendar()
encoding = options.encoding
if encoding is None:
encoding = sys.getdefaultencoding()
optdict = dict(encoding=encoding, css=options.css)
write = sys.stdout.buffer.write
if options.year is None:
write(cal.formatyearpage(datetime.date.today().year, **optdict))
elif options.month is None:
write(cal.formatyearpage(options.year, **optdict))
else:
parser.error("incorrect number of arguments")
sys.exit(1)
else:
if options.locale:
cal = LocaleTextCalendar(locale=locale)
else:
cal = TextCalendar()
optdict = dict(w=options.width, l=options.lines)
if options.month is None:
optdict["c"] = options.spacing
optdict["m"] = options.months
if options.year is None:
result = cal.formatyear(datetime.date.today().year, **optdict)
elif options.month is None:
result = cal.formatyear(options.year, **optdict)
else:
result = cal.formatmonth(options.year, options.month, **optdict)
write = sys.stdout.write
if options.encoding:
result = result.encode(options.encoding)
write = sys.stdout.buffer.write
write(result)
if __name__ == "__main__":
main(sys.argv)

314
Lib/code.py vendored Normal file
View File

@@ -0,0 +1,314 @@
"""Utilities needed to emulate Python's interactive interpreter.
"""
# Inspired by similar code by Jeff Epler and Fredrik Lundh.
import sys
import traceback
import argparse
from codeop import CommandCompiler, compile_command
__all__ = ["InteractiveInterpreter", "InteractiveConsole", "interact",
"compile_command"]
class InteractiveInterpreter:
"""Base class for InteractiveConsole.
This class deals with parsing and interpreter state (the user's
namespace); it doesn't deal with input buffering or prompting or
input file naming (the filename is always passed in explicitly).
"""
def __init__(self, locals=None):
"""Constructor.
The optional 'locals' argument specifies the dictionary in
which code will be executed; it defaults to a newly created
dictionary with key "__name__" set to "__console__" and key
"__doc__" set to None.
"""
if locals is None:
locals = {"__name__": "__console__", "__doc__": None}
self.locals = locals
self.compile = CommandCompiler()
def runsource(self, source, filename="<input>", symbol="single"):
"""Compile and run some source in the interpreter.
Arguments are as for compile_command().
One several things can happen:
1) The input is incorrect; compile_command() raised an
exception (SyntaxError or OverflowError). A syntax traceback
will be printed by calling the showsyntaxerror() method.
2) The input is incomplete, and more input is required;
compile_command() returned None. Nothing happens.
3) The input is complete; compile_command() returned a code
object. The code is executed by calling self.runcode() (which
also handles run-time exceptions, except for SystemExit).
The return value is True in case 2, False in the other cases (unless
an exception is raised). The return value can be used to
decide whether to use sys.ps1 or sys.ps2 to prompt the next
line.
"""
try:
code = self.compile(source, filename, symbol)
except (OverflowError, SyntaxError, ValueError):
# Case 1
self.showsyntaxerror(filename)
return False
if code is None:
# Case 2
return True
# Case 3
self.runcode(code)
return False
def runcode(self, code):
"""Execute a code object.
When an exception occurs, self.showtraceback() is called to
display a traceback. All exceptions are caught except
SystemExit, which is reraised.
A note about KeyboardInterrupt: this exception may occur
elsewhere in this code, and may not always be caught. The
caller should be prepared to deal with it.
"""
try:
exec(code, self.locals)
except SystemExit:
raise
except:
self.showtraceback()
def showsyntaxerror(self, filename=None):
"""Display the syntax error that just occurred.
This doesn't display a stack trace because there isn't one.
If a filename is given, it is stuffed in the exception instead
of what was there before (because Python's parser always uses
"<string>" when reading from a string).
The output is written by self.write(), below.
"""
type, value, tb = sys.exc_info()
sys.last_type = type
sys.last_value = value
sys.last_traceback = tb
if filename and type is SyntaxError:
# Work hard to stuff the correct filename in the exception
try:
msg, (dummy_filename, lineno, offset, line) = value.args
except ValueError:
# Not the format we expect; leave it alone
pass
else:
# Stuff in the right filename
value = SyntaxError(msg, (filename, lineno, offset, line))
sys.last_value = value
if sys.excepthook is sys.__excepthook__:
lines = traceback.format_exception_only(type, value)
self.write(''.join(lines))
else:
# If someone has set sys.excepthook, we let that take precedence
# over self.write
sys.excepthook(type, value, tb)
def showtraceback(self):
"""Display the exception that just occurred.
We remove the first stack item because it is our own code.
The output is written by self.write(), below.
"""
sys.last_type, sys.last_value, last_tb = ei = sys.exc_info()
sys.last_traceback = last_tb
try:
lines = traceback.format_exception(ei[0], ei[1], last_tb.tb_next)
if sys.excepthook is sys.__excepthook__:
self.write(''.join(lines))
else:
# If someone has set sys.excepthook, we let that take precedence
# over self.write
sys.excepthook(ei[0], ei[1], last_tb)
finally:
last_tb = ei = None
def write(self, data):
"""Write a string.
The base implementation writes to sys.stderr; a subclass may
replace this with a different implementation.
"""
sys.stderr.write(data)
class InteractiveConsole(InteractiveInterpreter):
"""Closely emulate the behavior of the interactive Python interpreter.
This class builds on InteractiveInterpreter and adds prompting
using the familiar sys.ps1 and sys.ps2, and input buffering.
"""
def __init__(self, locals=None, filename="<console>"):
"""Constructor.
The optional locals argument will be passed to the
InteractiveInterpreter base class.
The optional filename argument should specify the (file)name
of the input stream; it will show up in tracebacks.
"""
InteractiveInterpreter.__init__(self, locals)
self.filename = filename
self.resetbuffer()
def resetbuffer(self):
"""Reset the input buffer."""
self.buffer = []
def interact(self, banner=None, exitmsg=None):
"""Closely emulate the interactive Python console.
The optional banner argument specifies the banner to print
before the first interaction; by default it prints a banner
similar to the one printed by the real Python interpreter,
followed by the current class name in parentheses (so as not
to confuse this with the real interpreter -- since it's so
close!).
The optional exitmsg argument specifies the exit message
printed when exiting. Pass the empty string to suppress
printing an exit message. If exitmsg is not given or None,
a default message is printed.
"""
try:
sys.ps1
except AttributeError:
sys.ps1 = ">>> "
try:
sys.ps2
except AttributeError:
sys.ps2 = "... "
cprt = 'Type "help", "copyright", "credits" or "license" for more information.'
if banner is None:
self.write("Python %s on %s\n%s\n(%s)\n" %
(sys.version, sys.platform, cprt,
self.__class__.__name__))
elif banner:
self.write("%s\n" % str(banner))
more = 0
while 1:
try:
if more:
prompt = sys.ps2
else:
prompt = sys.ps1
try:
line = self.raw_input(prompt)
except EOFError:
self.write("\n")
break
else:
more = self.push(line)
except KeyboardInterrupt:
self.write("\nKeyboardInterrupt\n")
self.resetbuffer()
more = 0
if exitmsg is None:
self.write('now exiting %s...\n' % self.__class__.__name__)
elif exitmsg != '':
self.write('%s\n' % exitmsg)
def push(self, line):
"""Push a line to the interpreter.
The line should not have a trailing newline; it may have
internal newlines. The line is appended to a buffer and the
interpreter's runsource() method is called with the
concatenated contents of the buffer as source. If this
indicates that the command was executed or invalid, the buffer
is reset; otherwise, the command is incomplete, and the buffer
is left as it was after the line was appended. The return
value is 1 if more input is required, 0 if the line was dealt
with in some way (this is the same as runsource()).
"""
self.buffer.append(line)
source = "\n".join(self.buffer)
more = self.runsource(source, self.filename)
if not more:
self.resetbuffer()
return more
def raw_input(self, prompt=""):
"""Write a prompt and read a line.
The returned line does not include the trailing newline.
When the user enters the EOF key sequence, EOFError is raised.
The base implementation uses the built-in function
input(); a subclass may replace this with a different
implementation.
"""
return input(prompt)
def interact(banner=None, readfunc=None, local=None, exitmsg=None):
"""Closely emulate the interactive Python interpreter.
This is a backwards compatible interface to the InteractiveConsole
class. When readfunc is not specified, it attempts to import the
readline module to enable GNU readline if it is available.
Arguments (all optional, all default to None):
banner -- passed to InteractiveConsole.interact()
readfunc -- if not None, replaces InteractiveConsole.raw_input()
local -- passed to InteractiveInterpreter.__init__()
exitmsg -- passed to InteractiveConsole.interact()
"""
console = InteractiveConsole(local)
if readfunc is not None:
console.raw_input = readfunc
else:
try:
import readline
except ImportError:
pass
console.interact(banner, exitmsg)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-q', action='store_true',
help="don't print version and copyright messages")
args = parser.parse_args()
if args.q or sys.flags.quiet:
banner = ''
else:
banner = None
interact(banner)

186
Lib/getpass.py vendored Normal file
View File

@@ -0,0 +1,186 @@
"""Utilities to get a password and/or the current user name.
getpass(prompt[, stream]) - Prompt for a password, with echo turned off.
getuser() - Get the user name from the environment or password database.
GetPassWarning - This UserWarning is issued when getpass() cannot prevent
echoing of the password contents while reading.
On Windows, the msvcrt module will be used.
On the Mac EasyDialogs.AskPassword is used, if available.
"""
# Authors: Piers Lauder (original)
# Guido van Rossum (Windows support and cleanup)
# Gregory P. Smith (tty support & GetPassWarning)
import contextlib
import io
import os
import sys
import warnings
__all__ = ["getpass","getuser","GetPassWarning"]
class GetPassWarning(UserWarning): pass
def unix_getpass(prompt='Password: ', stream=None):
"""Prompt for a password, with echo turned off.
Args:
prompt: Written on stream to ask for the input. Default: 'Password: '
stream: A writable file object to display the prompt. Defaults to
the tty. If no tty is available defaults to sys.stderr.
Returns:
The seKr3t input.
Raises:
EOFError: If our input tty or stdin was closed.
GetPassWarning: When we were unable to turn echo off on the input.
Always restores terminal settings before returning.
"""
passwd = None
with contextlib.ExitStack() as stack:
try:
# Always try reading and writing directly on the tty first.
fd = os.open('/dev/tty', os.O_RDWR|os.O_NOCTTY)
tty = io.FileIO(fd, 'w+')
stack.enter_context(tty)
input = io.TextIOWrapper(tty)
stack.enter_context(input)
if not stream:
stream = input
except OSError as e:
# If that fails, see if stdin can be controlled.
stack.close()
try:
fd = sys.stdin.fileno()
except (AttributeError, ValueError):
fd = None
passwd = fallback_getpass(prompt, stream)
input = sys.stdin
if not stream:
stream = sys.stderr
if fd is not None:
try:
old = termios.tcgetattr(fd) # a copy to save
new = old[:]
new[3] &= ~termios.ECHO # 3 == 'lflags'
tcsetattr_flags = termios.TCSAFLUSH
if hasattr(termios, 'TCSASOFT'):
tcsetattr_flags |= termios.TCSASOFT
try:
termios.tcsetattr(fd, tcsetattr_flags, new)
passwd = _raw_input(prompt, stream, input=input)
finally:
termios.tcsetattr(fd, tcsetattr_flags, old)
stream.flush() # issue7208
except termios.error:
if passwd is not None:
# _raw_input succeeded. The final tcsetattr failed. Reraise
# instead of leaving the terminal in an unknown state.
raise
# We can't control the tty or stdin. Give up and use normal IO.
# fallback_getpass() raises an appropriate warning.
if stream is not input:
# clean up unused file objects before blocking
stack.close()
passwd = fallback_getpass(prompt, stream)
stream.write('\n')
return passwd
def win_getpass(prompt='Password: ', stream=None):
"""Prompt for password with echo off, using Windows getch()."""
if sys.stdin is not sys.__stdin__:
return fallback_getpass(prompt, stream)
for c in prompt:
msvcrt.putwch(c)
pw = ""
while 1:
c = msvcrt.getwch()
if c == '\r' or c == '\n':
break
if c == '\003':
raise KeyboardInterrupt
if c == '\b':
pw = pw[:-1]
else:
pw = pw + c
msvcrt.putwch('\r')
msvcrt.putwch('\n')
return pw
def fallback_getpass(prompt='Password: ', stream=None):
warnings.warn("Can not control echo on the terminal.", GetPassWarning,
stacklevel=2)
if not stream:
stream = sys.stderr
print("Warning: Password input may be echoed.", file=stream)
return _raw_input(prompt, stream)
def _raw_input(prompt="", stream=None, input=None):
# This doesn't save the string in the GNU readline history.
if not stream:
stream = sys.stderr
if not input:
input = sys.stdin
prompt = str(prompt)
if prompt:
try:
stream.write(prompt)
except UnicodeEncodeError:
# Use replace error handler to get as much as possible printed.
prompt = prompt.encode(stream.encoding, 'replace')
prompt = prompt.decode(stream.encoding)
stream.write(prompt)
stream.flush()
# NOTE: The Python C API calls flockfile() (and unlock) during readline.
line = input.readline()
if not line:
raise EOFError
if line[-1] == '\n':
line = line[:-1]
return line
def getuser():
"""Get the username from the environment or password database.
First try various environment variables, then the password
database. This works on Windows as long as USERNAME is set.
"""
for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'):
user = os.environ.get(name)
if user:
return user
# If this fails, the exception will "explain" why
import pwd
return pwd.getpwuid(os.getuid())[0]
# Bind the name getpass to the appropriate function
try:
import termios
# it's possible there is an incompatible termios from the
# McMillan Installer, make sure we have a UNIX-compatible termios
termios.tcgetattr, termios.tcsetattr
except (ImportError, AttributeError):
try:
import msvcrt
except ImportError:
getpass = fallback_getpass
else:
getpass = win_getpass
else:
getpass = unix_getpass

145
Lib/hmac.py vendored Normal file
View File

@@ -0,0 +1,145 @@
"""HMAC (Keyed-Hashing for Message Authentication) Python module.
Implements the HMAC algorithm as described by RFC 2104.
"""
import warnings as _warnings
# XXX RustPython TODO: _operator
#from _operator import _compare_digest as compare_digest
import hashlib as _hashlib
trans_5C = bytes((x ^ 0x5C) for x in range(256))
trans_36 = bytes((x ^ 0x36) for x in range(256))
# The size of the digests returned by HMAC depends on the underlying
# hashing module used. Use digest_size from the instance of HMAC instead.
digest_size = None
class HMAC:
"""RFC 2104 HMAC class. Also complies with RFC 4231.
This supports the API for Cryptographic Hash Functions (PEP 247).
"""
blocksize = 64 # 512-bit HMAC; can be changed in subclasses.
def __init__(self, key, msg = None, digestmod = None):
"""Create a new HMAC object.
key: key for the keyed hash object.
msg: Initial input for the hash, if provided.
digestmod: A module supporting PEP 247. *OR*
A hashlib constructor returning a new hash object. *OR*
A hash name suitable for hashlib.new().
Defaults to hashlib.md5.
Implicit default to hashlib.md5 is deprecated and will be
removed in Python 3.6.
Note: key and msg must be a bytes or bytearray objects.
"""
if not isinstance(key, (bytes, bytearray)):
raise TypeError("key: expected bytes or bytearray, but got %r" % type(key).__name__)
if digestmod is None:
_warnings.warn("HMAC() without an explicit digestmod argument "
"is deprecated.", PendingDeprecationWarning, 2)
digestmod = _hashlib.md5
if callable(digestmod):
self.digest_cons = digestmod
elif isinstance(digestmod, str):
self.digest_cons = lambda d=b'': _hashlib.new(digestmod, d)
else:
self.digest_cons = lambda d=b'': digestmod.new(d)
self.outer = self.digest_cons()
self.inner = self.digest_cons()
self.digest_size = self.inner.digest_size
if hasattr(self.inner, 'block_size'):
blocksize = self.inner.block_size
if blocksize < 16:
_warnings.warn('block_size of %d seems too small; using our '
'default of %d.' % (blocksize, self.blocksize),
RuntimeWarning, 2)
blocksize = self.blocksize
else:
_warnings.warn('No block_size attribute on given digest object; '
'Assuming %d.' % (self.blocksize),
RuntimeWarning, 2)
blocksize = self.blocksize
# self.blocksize is the default blocksize. self.block_size is
# effective block size as well as the public API attribute.
self.block_size = blocksize
if len(key) > blocksize:
key = self.digest_cons(key).digest()
key = key.ljust(blocksize, b'\0')
self.outer.update(key.translate(trans_5C))
self.inner.update(key.translate(trans_36))
if msg is not None:
self.update(msg)
@property
def name(self):
return "hmac-" + self.inner.name
def update(self, msg):
"""Update this hashing object with the string msg.
"""
self.inner.update(msg)
def copy(self):
"""Return a separate copy of this hashing object.
An update to this copy won't affect the original object.
"""
# Call __new__ directly to avoid the expensive __init__.
other = self.__class__.__new__(self.__class__)
other.digest_cons = self.digest_cons
other.digest_size = self.digest_size
other.inner = self.inner.copy()
other.outer = self.outer.copy()
return other
def _current(self):
"""Return a hash object for the current state.
To be used only internally with digest() and hexdigest().
"""
h = self.outer.copy()
h.update(self.inner.digest())
return h
def digest(self):
"""Return the hash value of this hashing object.
This returns a string containing 8-bit data. The object is
not altered in any way by this function; you can continue
updating the object after calling this function.
"""
h = self._current()
return h.digest()
def hexdigest(self):
"""Like digest(), but returns a string of hexadecimal digits instead.
"""
h = self._current()
return h.hexdigest()
def new(key, msg = None, digestmod = None):
"""Create a new hashing object and return it.
key: The starting key for the hash.
msg: if available, will immediately be hashed into the object's starting
state.
You can now feed arbitrary strings into the object using its update()
method, and can ask for the hash value at any time by calling its digest()
method.
"""
return HMAC(key, msg, digestmod)

132
Lib/html/__init__.py Normal file
View File

@@ -0,0 +1,132 @@
"""
General functions for HTML manipulation.
"""
import re as _re
from html.entities import html5 as _html5
__all__ = ['escape', 'unescape']
def escape(s, quote=True):
"""
Replace special characters "&", "<" and ">" to HTML-safe sequences.
If the optional flag quote is true (the default), the quotation mark
characters, both double quote (") and single quote (') characters are also
translated.
"""
s = s.replace("&", "&amp;") # Must be done first!
s = s.replace("<", "&lt;")
s = s.replace(">", "&gt;")
if quote:
s = s.replace('"', "&quot;")
s = s.replace('\'', "&#x27;")
return s
# see http://www.w3.org/TR/html5/syntax.html#tokenizing-character-references
_invalid_charrefs = {
0x00: '\ufffd', # REPLACEMENT CHARACTER
0x0d: '\r', # CARRIAGE RETURN
0x80: '\u20ac', # EURO SIGN
0x81: '\x81', # <control>
0x82: '\u201a', # SINGLE LOW-9 QUOTATION MARK
0x83: '\u0192', # LATIN SMALL LETTER F WITH HOOK
0x84: '\u201e', # DOUBLE LOW-9 QUOTATION MARK
0x85: '\u2026', # HORIZONTAL ELLIPSIS
0x86: '\u2020', # DAGGER
0x87: '\u2021', # DOUBLE DAGGER
0x88: '\u02c6', # MODIFIER LETTER CIRCUMFLEX ACCENT
0x89: '\u2030', # PER MILLE SIGN
0x8a: '\u0160', # LATIN CAPITAL LETTER S WITH CARON
0x8b: '\u2039', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x8c: '\u0152', # LATIN CAPITAL LIGATURE OE
0x8d: '\x8d', # <control>
0x8e: '\u017d', # LATIN CAPITAL LETTER Z WITH CARON
0x8f: '\x8f', # <control>
0x90: '\x90', # <control>
0x91: '\u2018', # LEFT SINGLE QUOTATION MARK
0x92: '\u2019', # RIGHT SINGLE QUOTATION MARK
0x93: '\u201c', # LEFT DOUBLE QUOTATION MARK
0x94: '\u201d', # RIGHT DOUBLE QUOTATION MARK
0x95: '\u2022', # BULLET
0x96: '\u2013', # EN DASH
0x97: '\u2014', # EM DASH
0x98: '\u02dc', # SMALL TILDE
0x99: '\u2122', # TRADE MARK SIGN
0x9a: '\u0161', # LATIN SMALL LETTER S WITH CARON
0x9b: '\u203a', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x9c: '\u0153', # LATIN SMALL LIGATURE OE
0x9d: '\x9d', # <control>
0x9e: '\u017e', # LATIN SMALL LETTER Z WITH CARON
0x9f: '\u0178', # LATIN CAPITAL LETTER Y WITH DIAERESIS
}
_invalid_codepoints = {
# 0x0001 to 0x0008
0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
# 0x000E to 0x001F
0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
# 0x007F to 0x009F
0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a,
0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
# 0xFDD0 to 0xFDEF
0xfdd0, 0xfdd1, 0xfdd2, 0xfdd3, 0xfdd4, 0xfdd5, 0xfdd6, 0xfdd7, 0xfdd8,
0xfdd9, 0xfdda, 0xfddb, 0xfddc, 0xfddd, 0xfdde, 0xfddf, 0xfde0, 0xfde1,
0xfde2, 0xfde3, 0xfde4, 0xfde5, 0xfde6, 0xfde7, 0xfde8, 0xfde9, 0xfdea,
0xfdeb, 0xfdec, 0xfded, 0xfdee, 0xfdef,
# others
0xb, 0xfffe, 0xffff, 0x1fffe, 0x1ffff, 0x2fffe, 0x2ffff, 0x3fffe, 0x3ffff,
0x4fffe, 0x4ffff, 0x5fffe, 0x5ffff, 0x6fffe, 0x6ffff, 0x7fffe, 0x7ffff,
0x8fffe, 0x8ffff, 0x9fffe, 0x9ffff, 0xafffe, 0xaffff, 0xbfffe, 0xbffff,
0xcfffe, 0xcffff, 0xdfffe, 0xdffff, 0xefffe, 0xeffff, 0xffffe, 0xfffff,
0x10fffe, 0x10ffff
}
def _replace_charref(s):
s = s.group(1)
if s[0] == '#':
# numeric charref
if s[1] in 'xX':
num = int(s[2:].rstrip(';'), 16)
else:
num = int(s[1:].rstrip(';'))
if num in _invalid_charrefs:
return _invalid_charrefs[num]
if 0xD800 <= num <= 0xDFFF or num > 0x10FFFF:
return '\uFFFD'
if num in _invalid_codepoints:
return ''
return chr(num)
else:
# named charref
if s in _html5:
return _html5[s]
# find the longest matching name (as defined by the standard)
for x in range(len(s)-1, 1, -1):
if s[:x] in _html5:
return _html5[s[:x]] + s[x:]
else:
return '&' + s
_charref = _re.compile(r'&(#[0-9]+;?'
r'|#[xX][0-9a-fA-F]+;?'
r'|[^\t\n\f <&#;]{1,32};?)')
def unescape(s):
"""
Convert all named and numeric character references (e.g. &gt;, &#62;,
&x3e;) in the string s to the corresponding unicode characters.
This function uses the rules defined by the HTML 5 standard
for both valid and invalid character references, and the list of
HTML 5 named character references defined in html.entities.html5.
"""
if '&' not in s:
return s
return _charref.sub(_replace_charref, s)

2509
Lib/html/entities.py Normal file

File diff suppressed because it is too large Load Diff

470
Lib/html/parser.py Normal file
View File

@@ -0,0 +1,470 @@
"""A parser for HTML and XHTML."""
# This file is based on sgmllib.py, but the API is slightly different.
# XXX There should be a way to distinguish between PCDATA (parsed
# character data -- the normal case), RCDATA (replaceable character
# data -- only char and entity references and end tags are special)
# and CDATA (character data -- only end tags are special).
import re
import warnings
import _markupbase
from html import unescape
__all__ = ['HTMLParser']
# Regular expressions used for parsing
interesting_normal = re.compile('[&<]')
incomplete = re.compile('&[a-zA-Z#]')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
# Note:
# 1) if you change tagfind/attrfind remember to update locatestarttagend too;
# 2) if you change tagfind/attrfind and/or locatestarttagend the parser will
# explode, so don't do it.
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
attrfind_tolerant = re.compile(
r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
locatestarttagend_tolerant = re.compile(r"""
<[a-zA-Z][^\t\n\r\f />\x00]* # tag name
(?:[\s/]* # optional whitespace before attribute name
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
(?:\s*=+\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|"[^"]*" # LIT-enclosed value
|(?!['"])[^>\s]* # bare value
)
(?:\s*,)* # possibly followed by a comma
)?(?:\s|/(?!>))*
)*
)?
\s* # trailing whitespace
""", re.VERBOSE)
endendtag = re.compile('>')
# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
# </ and the tag name, so maybe this should be fixed
endtagfind = re.compile(r'</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
class HTMLParser(_markupbase.ParserBase):
"""Find tags and other markup and call handler functions.
Usage:
p = HTMLParser()
p.feed(data)
...
p.close()
Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag(). The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks). If convert_charrefs is
True the character references are converted automatically to the
corresponding Unicode character (and self.handle_data() is no
longer split in chunks), otherwise they are passed by calling
self.handle_entityref() or self.handle_charref() with the string
containing respectively the named or numeric reference as the
argument.
"""
CDATA_CONTENT_ELEMENTS = ("script", "style")
def __init__(self, *, convert_charrefs=True):
"""Initialize and reset this instance.
If convert_charrefs is True (the default), all character references
are automatically converted to the corresponding Unicode characters.
"""
self.convert_charrefs = convert_charrefs
self.reset()
def reset(self):
"""Reset this instance. Loses all unprocessed data."""
self.rawdata = ''
self.lasttag = '???'
self.interesting = interesting_normal
self.cdata_elem = None
_markupbase.ParserBase.reset(self)
def feed(self, data):
r"""Feed data to the parser.
Call this as often as you want, with as little or as much text
as you want (may include '\n').
"""
self.rawdata = self.rawdata + data
self.goahead(0)
def close(self):
"""Handle any buffered data."""
self.goahead(1)
__starttag_text = None
def get_starttag_text(self):
"""Return full source of start tag: '<...>'."""
return self.__starttag_text
def set_cdata_mode(self, elem):
self.cdata_elem = elem.lower()
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
def clear_cdata_mode(self):
self.interesting = interesting_normal
self.cdata_elem = None
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
# true, force handling all data as if followed by EOF marker.
def goahead(self, end):
rawdata = self.rawdata
i = 0
n = len(rawdata)
while i < n:
if self.convert_charrefs and not self.cdata_elem:
j = rawdata.find('<', i)
if j < 0:
# if we can't find the next <, either we are at the end
# or there's more text incoming. If the latter is True,
# we can't pass the text to handle_data in case we have
# a charref cut in half at end. Try to determine if
# this is the case before proceeding by looking for an
# & near the end and see if it's followed by a space or ;.
amppos = rawdata.rfind('&', max(i, n-34))
if (amppos >= 0 and
not re.compile(r'[\s;]').search(rawdata, amppos)):
break # wait till we get all the text
j = n
else:
match = self.interesting.search(rawdata, i) # < or &
if match:
j = match.start()
else:
if self.cdata_elem:
break
j = n
if i < j:
if self.convert_charrefs and not self.cdata_elem:
self.handle_data(unescape(rawdata[i:j]))
else:
self.handle_data(rawdata[i:j])
i = self.updatepos(i, j)
if i == n: break
startswith = rawdata.startswith
if startswith('<', i):
if starttagopen.match(rawdata, i): # < + letter
k = self.parse_starttag(i)
elif startswith("</", i):
k = self.parse_endtag(i)
elif startswith("<!--", i):
k = self.parse_comment(i)
elif startswith("<?", i):
k = self.parse_pi(i)
elif startswith("<!", i):
k = self.parse_html_declaration(i)
elif (i + 1) < n:
self.handle_data("<")
k = i + 1
else:
break
if k < 0:
if not end:
break
k = rawdata.find('>', i + 1)
if k < 0:
k = rawdata.find('<', i + 1)
if k < 0:
k = i + 1
else:
k += 1
if self.convert_charrefs and not self.cdata_elem:
self.handle_data(unescape(rawdata[i:k]))
else:
self.handle_data(rawdata[i:k])
i = self.updatepos(i, k)
elif startswith("&#", i):
match = charref.match(rawdata, i)
if match:
name = match.group()[2:-1]
self.handle_charref(name)
k = match.end()
if not startswith(';', k-1):
k = k - 1
i = self.updatepos(i, k)
continue
else:
if ";" in rawdata[i:]: # bail by consuming &#
self.handle_data(rawdata[i:i+2])
i = self.updatepos(i, i+2)
break
elif startswith('&', i):
match = entityref.match(rawdata, i)
if match:
name = match.group(1)
self.handle_entityref(name)
k = match.end()
if not startswith(';', k-1):
k = k - 1
i = self.updatepos(i, k)
continue
match = incomplete.match(rawdata, i)
if match:
# match.group() will contain at least 2 chars
if end and match.group() == rawdata[i:]:
k = match.end()
if k <= i:
k = n
i = self.updatepos(i, i + 1)
# incomplete
break
elif (i + 1) < n:
# not the end of the buffer, and can't be confused
# with some other construct
self.handle_data("&")
i = self.updatepos(i, i + 1)
else:
break
else:
assert 0, "interesting.search() lied"
# end while
if end and i < n and not self.cdata_elem:
if self.convert_charrefs and not self.cdata_elem:
self.handle_data(unescape(rawdata[i:n]))
else:
self.handle_data(rawdata[i:n])
i = self.updatepos(i, n)
self.rawdata = rawdata[i:]
# Internal -- parse html declarations, return length or -1 if not terminated
# See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
# See also parse_declaration in _markupbase
def parse_html_declaration(self, i):
rawdata = self.rawdata
assert rawdata[i:i+2] == '<!', ('unexpected call to '
'parse_html_declaration()')
if rawdata[i:i+4] == '<!--':
# this case is actually already handled in goahead()
return self.parse_comment(i)
elif rawdata[i:i+3] == '<![':
return self.parse_marked_section(i)
elif rawdata[i:i+9].lower() == '<!doctype':
# find the closing >
gtpos = rawdata.find('>', i+9)
if gtpos == -1:
return -1
self.handle_decl(rawdata[i+2:gtpos])
return gtpos+1
else:
return self.parse_bogus_comment(i)
# Internal -- parse bogus comment, return length or -1 if not terminated
# see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
def parse_bogus_comment(self, i, report=1):
rawdata = self.rawdata
assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
'parse_comment()')
pos = rawdata.find('>', i+2)
if pos == -1:
return -1
if report:
self.handle_comment(rawdata[i+2:pos])
return pos + 1
# Internal -- parse processing instr, return end or -1 if not terminated
def parse_pi(self, i):
rawdata = self.rawdata
assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
match = piclose.search(rawdata, i+2) # >
if not match:
return -1
j = match.start()
self.handle_pi(rawdata[i+2: j])
j = match.end()
return j
# Internal -- handle starttag, return end or -1 if not terminated
def parse_starttag(self, i):
self.__starttag_text = None
endpos = self.check_for_whole_start_tag(i)
if endpos < 0:
return endpos
rawdata = self.rawdata
self.__starttag_text = rawdata[i:endpos]
# Now parse the data between i+1 and j into a tag and attrs
attrs = []
match = tagfind_tolerant.match(rawdata, i+1)
assert match, 'unexpected call to parse_starttag()'
k = match.end()
self.lasttag = tag = match.group(1).lower()
while k < endpos:
m = attrfind_tolerant.match(rawdata, k)
if not m:
break
attrname, rest, attrvalue = m.group(1, 2, 3)
if not rest:
attrvalue = None
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
attrvalue[:1] == '"' == attrvalue[-1:]:
attrvalue = attrvalue[1:-1]
if attrvalue:
attrvalue = unescape(attrvalue)
attrs.append((attrname.lower(), attrvalue))
k = m.end()
end = rawdata[k:endpos].strip()
if end not in (">", "/>"):
lineno, offset = self.getpos()
if "\n" in self.__starttag_text:
lineno = lineno + self.__starttag_text.count("\n")
offset = len(self.__starttag_text) \
- self.__starttag_text.rfind("\n")
else:
offset = offset + len(self.__starttag_text)
self.handle_data(rawdata[i:endpos])
return endpos
if end.endswith('/>'):
# XHTML-style empty tag: <span attr="value" />
self.handle_startendtag(tag, attrs)
else:
self.handle_starttag(tag, attrs)
if tag in self.CDATA_CONTENT_ELEMENTS:
self.set_cdata_mode(tag)
return endpos
# Internal -- check to see if we have a complete starttag; return end
# or -1 if incomplete.
def check_for_whole_start_tag(self, i):
rawdata = self.rawdata
m = locatestarttagend_tolerant.match(rawdata, i)
if m:
j = m.end()
next = rawdata[j:j+1]
if next == ">":
return j + 1
if next == "/":
if rawdata.startswith("/>", j):
return j + 2
if rawdata.startswith("/", j):
# buffer boundary
return -1
# else bogus input
if j > i:
return j
else:
return i + 1
if next == "":
# end of input
return -1
if next in ("abcdefghijklmnopqrstuvwxyz=/"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
# end of input in or before attribute value, or we have the
# '/' from a '/>' ending
return -1
if j > i:
return j
else:
return i + 1
raise AssertionError("we should not get here!")
# Internal -- parse endtag, return end or -1 if incomplete
def parse_endtag(self, i):
rawdata = self.rawdata
assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
match = endendtag.search(rawdata, i+1) # >
if not match:
return -1
gtpos = match.end()
match = endtagfind.match(rawdata, i) # </ + tag + >
if not match:
if self.cdata_elem is not None:
self.handle_data(rawdata[i:gtpos])
return gtpos
# find the name: w3.org/TR/html5/tokenization.html#tag-name-state
namematch = tagfind_tolerant.match(rawdata, i+2)
if not namematch:
# w3.org/TR/html5/tokenization.html#end-tag-open-state
if rawdata[i:i+3] == '</>':
return i+3
else:
return self.parse_bogus_comment(i)
tagname = namematch.group(1).lower()
# consume and ignore other stuff between the name and the >
# Note: this is not 100% correct, since we might have things like
# </tag attr=">">, but looking for > after tha name should cover
# most of the cases and is much simpler
gtpos = rawdata.find('>', namematch.end())
self.handle_endtag(tagname)
return gtpos+1
elem = match.group(1).lower() # script or style
if self.cdata_elem is not None:
if elem != self.cdata_elem:
self.handle_data(rawdata[i:gtpos])
return gtpos
self.handle_endtag(elem.lower())
self.clear_cdata_mode()
return gtpos
# Overridable -- finish processing of start+end tag: <tag.../>
def handle_startendtag(self, tag, attrs):
self.handle_starttag(tag, attrs)
self.handle_endtag(tag)
# Overridable -- handle start tag
def handle_starttag(self, tag, attrs):
pass
# Overridable -- handle end tag
def handle_endtag(self, tag):
pass
# Overridable -- handle character reference
def handle_charref(self, name):
pass
# Overridable -- handle entity reference
def handle_entityref(self, name):
pass
# Overridable -- handle data
def handle_data(self, data):
pass
# Overridable -- handle comment
def handle_comment(self, data):
pass
# Overridable -- handle declaration
def handle_decl(self, decl):
pass
# Overridable -- handle processing instruction
def handle_pi(self, data):
pass
def unknown_decl(self, data):
pass
# Internal -- helper to remove special character quoting
def unescape(self, s):
warnings.warn('The unescape method is deprecated and will be removed '
'in 3.5, use html.unescape() instead.',
DeprecationWarning, stacklevel=2)
return unescape(s)

134
Lib/http/__init__.py Normal file
View File

@@ -0,0 +1,134 @@
from enum import IntEnum
__all__ = ['HTTPStatus']
class HTTPStatus(IntEnum):
"""HTTP status codes and reason phrases
Status codes from the following RFCs are all observed:
* RFC 7231: Hypertext Transfer Protocol (HTTP/1.1), obsoletes 2616
* RFC 6585: Additional HTTP Status Codes
* RFC 3229: Delta encoding in HTTP
* RFC 4918: HTTP Extensions for WebDAV, obsoletes 2518
* RFC 5842: Binding Extensions to WebDAV
* RFC 7238: Permanent Redirect
* RFC 2295: Transparent Content Negotiation in HTTP
* RFC 2774: An HTTP Extension Framework
"""
def __new__(cls, value, phrase, description=''):
obj = int.__new__(cls, value)
obj._value_ = value
obj.phrase = phrase
obj.description = description
return obj
# informational
CONTINUE = 100, 'Continue', 'Request received, please continue'
SWITCHING_PROTOCOLS = (101, 'Switching Protocols',
'Switching to new protocol; obey Upgrade header')
PROCESSING = 102, 'Processing'
# success
OK = 200, 'OK', 'Request fulfilled, document follows'
CREATED = 201, 'Created', 'Document created, URL follows'
ACCEPTED = (202, 'Accepted',
'Request accepted, processing continues off-line')
NON_AUTHORITATIVE_INFORMATION = (203,
'Non-Authoritative Information', 'Request fulfilled from cache')
NO_CONTENT = 204, 'No Content', 'Request fulfilled, nothing follows'
RESET_CONTENT = 205, 'Reset Content', 'Clear input form for further input'
PARTIAL_CONTENT = 206, 'Partial Content', 'Partial content follows'
MULTI_STATUS = 207, 'Multi-Status'
ALREADY_REPORTED = 208, 'Already Reported'
IM_USED = 226, 'IM Used'
# redirection
MULTIPLE_CHOICES = (300, 'Multiple Choices',
'Object has several resources -- see URI list')
MOVED_PERMANENTLY = (301, 'Moved Permanently',
'Object moved permanently -- see URI list')
FOUND = 302, 'Found', 'Object moved temporarily -- see URI list'
SEE_OTHER = 303, 'See Other', 'Object moved -- see Method and URL list'
NOT_MODIFIED = (304, 'Not Modified',
'Document has not changed since given time')
USE_PROXY = (305, 'Use Proxy',
'You must use proxy specified in Location to access this resource')
TEMPORARY_REDIRECT = (307, 'Temporary Redirect',
'Object moved temporarily -- see URI list')
PERMANENT_REDIRECT = (308, 'Permanent Redirect',
'Object moved temporarily -- see URI list')
# client error
BAD_REQUEST = (400, 'Bad Request',
'Bad request syntax or unsupported method')
UNAUTHORIZED = (401, 'Unauthorized',
'No permission -- see authorization schemes')
PAYMENT_REQUIRED = (402, 'Payment Required',
'No payment -- see charging schemes')
FORBIDDEN = (403, 'Forbidden',
'Request forbidden -- authorization will not help')
NOT_FOUND = (404, 'Not Found',
'Nothing matches the given URI')
METHOD_NOT_ALLOWED = (405, 'Method Not Allowed',
'Specified method is invalid for this resource')
NOT_ACCEPTABLE = (406, 'Not Acceptable',
'URI not available in preferred format')
PROXY_AUTHENTICATION_REQUIRED = (407,
'Proxy Authentication Required',
'You must authenticate with this proxy before proceeding')
REQUEST_TIMEOUT = (408, 'Request Timeout',
'Request timed out; try again later')
CONFLICT = 409, 'Conflict', 'Request conflict'
GONE = (410, 'Gone',
'URI no longer exists and has been permanently removed')
LENGTH_REQUIRED = (411, 'Length Required',
'Client must specify Content-Length')
PRECONDITION_FAILED = (412, 'Precondition Failed',
'Precondition in headers is false')
REQUEST_ENTITY_TOO_LARGE = (413, 'Request Entity Too Large',
'Entity is too large')
REQUEST_URI_TOO_LONG = (414, 'Request-URI Too Long',
'URI is too long')
UNSUPPORTED_MEDIA_TYPE = (415, 'Unsupported Media Type',
'Entity body in unsupported format')
REQUESTED_RANGE_NOT_SATISFIABLE = (416,
'Requested Range Not Satisfiable',
'Cannot satisfy request range')
EXPECTATION_FAILED = (417, 'Expectation Failed',
'Expect condition could not be satisfied')
UNPROCESSABLE_ENTITY = 422, 'Unprocessable Entity'
LOCKED = 423, 'Locked'
FAILED_DEPENDENCY = 424, 'Failed Dependency'
UPGRADE_REQUIRED = 426, 'Upgrade Required'
PRECONDITION_REQUIRED = (428, 'Precondition Required',
'The origin server requires the request to be conditional')
TOO_MANY_REQUESTS = (429, 'Too Many Requests',
'The user has sent too many requests in '
'a given amount of time ("rate limiting")')
REQUEST_HEADER_FIELDS_TOO_LARGE = (431,
'Request Header Fields Too Large',
'The server is unwilling to process the request because its header '
'fields are too large')
# server errors
INTERNAL_SERVER_ERROR = (500, 'Internal Server Error',
'Server got itself in trouble')
NOT_IMPLEMENTED = (501, 'Not Implemented',
'Server does not support this operation')
BAD_GATEWAY = (502, 'Bad Gateway',
'Invalid responses from another server/proxy')
SERVICE_UNAVAILABLE = (503, 'Service Unavailable',
'The server cannot process the request due to a high load')
GATEWAY_TIMEOUT = (504, 'Gateway Timeout',
'The gateway server did not receive a timely response')
HTTP_VERSION_NOT_SUPPORTED = (505, 'HTTP Version Not Supported',
'Cannot fulfill request')
VARIANT_ALSO_NEGOTIATES = 506, 'Variant Also Negotiates'
INSUFFICIENT_STORAGE = 507, 'Insufficient Storage'
LOOP_DETECTED = 508, 'Loop Detected'
NOT_EXTENDED = 510, 'Not Extended'
NETWORK_AUTHENTICATION_REQUIRED = (511,
'Network Authentication Required',
'The client needs to authenticate to gain network access')

1478
Lib/http/client.py Normal file

File diff suppressed because it is too large Load Diff

2098
Lib/http/cookiejar.py Normal file

File diff suppressed because it is too large Load Diff

635
Lib/http/cookies.py Normal file
View File

@@ -0,0 +1,635 @@
####
# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu>
#
# All Rights Reserved
#
# Permission to use, copy, modify, and distribute this software
# and its documentation for any purpose and without fee is hereby
# granted, provided that the above copyright notice appear in all
# copies and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Timothy O'Malley not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
#
####
#
# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp
# by Timothy O'Malley <timo@alum.mit.edu>
#
# Cookie.py is a Python module for the handling of HTTP
# cookies as a Python dictionary. See RFC 2109 for more
# information on cookies.
#
# The original idea to treat Cookies as a dictionary came from
# Dave Mitchell (davem@magnet.com) in 1995, when he released the
# first version of nscookie.py.
#
####
r"""
Here's a sample session to show how to use this module.
At the moment, this is the only documentation.
The Basics
----------
Importing is easy...
>>> from http import cookies
Most of the time you start by creating a cookie.
>>> C = cookies.SimpleCookie()
Once you've created your Cookie, you can add values just as if it were
a dictionary.
>>> C = cookies.SimpleCookie()
>>> C["fig"] = "newton"
>>> C["sugar"] = "wafer"
>>> C.output()
'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer'
Notice that the printable representation of a Cookie is the
appropriate format for a Set-Cookie: header. This is the
default behavior. You can change the header and printed
attributes by using the .output() function
>>> C = cookies.SimpleCookie()
>>> C["rocky"] = "road"
>>> C["rocky"]["path"] = "/cookie"
>>> print(C.output(header="Cookie:"))
Cookie: rocky=road; Path=/cookie
>>> print(C.output(attrs=[], header="Cookie:"))
Cookie: rocky=road
The load() method of a Cookie extracts cookies from a string. In a
CGI script, you would use this method to extract the cookies from the
HTTP_COOKIE environment variable.
>>> C = cookies.SimpleCookie()
>>> C.load("chips=ahoy; vienna=finger")
>>> C.output()
'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger'
The load() method is darn-tootin smart about identifying cookies
within a string. Escaped quotation marks, nested semicolons, and other
such trickeries do not confuse it.
>>> C = cookies.SimpleCookie()
>>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";')
>>> print(C)
Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"
Each element of the Cookie also supports all of the RFC 2109
Cookie attributes. Here's an example which sets the Path
attribute.
>>> C = cookies.SimpleCookie()
>>> C["oreo"] = "doublestuff"
>>> C["oreo"]["path"] = "/"
>>> print(C)
Set-Cookie: oreo=doublestuff; Path=/
Each dictionary element has a 'value' attribute, which gives you
back the value associated with the key.
>>> C = cookies.SimpleCookie()
>>> C["twix"] = "none for you"
>>> C["twix"].value
'none for you'
The SimpleCookie expects that all values should be standard strings.
Just to be sure, SimpleCookie invokes the str() builtin to convert
the value to a string, when the values are set dictionary-style.
>>> C = cookies.SimpleCookie()
>>> C["number"] = 7
>>> C["string"] = "seven"
>>> C["number"].value
'7'
>>> C["string"].value
'seven'
>>> C.output()
'Set-Cookie: number=7\r\nSet-Cookie: string=seven'
Finis.
"""
#
# Import our required modules
#
import re
import string
__all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
_nulljoin = ''.join
_semispacejoin = '; '.join
_spacejoin = ' '.join
def _warn_deprecated_setter(setter):
import warnings
msg = ('The .%s setter is deprecated. The attribute will be read-only in '
'future releases. Please use the set() method instead.' % setter)
warnings.warn(msg, DeprecationWarning, stacklevel=3)
#
# Define an exception visible to External modules
#
class CookieError(Exception):
pass
# These quoting routines conform to the RFC2109 specification, which in
# turn references the character definitions from RFC2068. They provide
# a two-way quoting algorithm. Any non-text character is translated
# into a 4 character sequence: a forward-slash followed by the
# three-digit octal equivalent of the character. Any '\' or '"' is
# quoted with a preceding '\' slash.
# Because of the way browsers really handle cookies (as opposed to what
# the RFC says) we also encode "," and ";".
#
# These are taken from RFC2068 and RFC2109.
# _LegalChars is the list of chars which don't require "'s
# _Translator hash-table for fast quoting
#
_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:"
_UnescapedChars = _LegalChars + ' ()/<=>?@[]{}'
_Translator = {n: '\\%03o' % n
for n in set(range(256)) - set(map(ord, _UnescapedChars))}
_Translator.update({
ord('"'): '\\"',
ord('\\'): '\\\\',
})
_is_legal_key = re.compile('[%s]+' % re.escape(_LegalChars)).fullmatch
def _quote(str):
r"""Quote a string for use in a cookie header.
If the string does not need to be double-quoted, then just return the
string. Otherwise, surround the string in doublequotes and quote
(with a \) special characters.
"""
if str is None or _is_legal_key(str):
return str
else:
return '"' + str.translate(_Translator) + '"'
_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
_QuotePatt = re.compile(r"[\\].")
def _unquote(str):
# If there aren't any doublequotes,
# then there can't be any special characters. See RFC 2109.
if str is None or len(str) < 2:
return str
if str[0] != '"' or str[-1] != '"':
return str
# We have to assume that we must decode this string.
# Down to work.
# Remove the "s
str = str[1:-1]
# Check for special sequences. Examples:
# \012 --> \n
# \" --> "
#
i = 0
n = len(str)
res = []
while 0 <= i < n:
o_match = _OctalPatt.search(str, i)
q_match = _QuotePatt.search(str, i)
if not o_match and not q_match: # Neither matched
res.append(str[i:])
break
# else:
j = k = -1
if o_match:
j = o_match.start(0)
if q_match:
k = q_match.start(0)
if q_match and (not o_match or k < j): # QuotePatt matched
res.append(str[i:k])
res.append(str[k+1])
i = k + 2
else: # OctalPatt matched
res.append(str[i:j])
res.append(chr(int(str[j+1:j+4], 8)))
i = j + 4
return _nulljoin(res)
# The _getdate() routine is used to set the expiration time in the cookie's HTTP
# header. By default, _getdate() returns the current time in the appropriate
# "expires" format for a Set-Cookie header. The one optional argument is an
# offset from now, in seconds. For example, an offset of -3600 means "one hour
# ago". The offset may be a floating point number.
#
_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
_monthname = [None,
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname):
from time import gmtime, time
now = time()
year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future)
return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \
(weekdayname[wd], day, monthname[month], year, hh, mm, ss)
class Morsel(dict):
"""A class to hold ONE (key, value) pair.
In a cookie, each such pair may have several attributes, so this class is
used to keep the attributes associated with the appropriate key,value pair.
This class also includes a coded_value attribute, which is used to hold
the network representation of the value. This is most useful when Python
objects are pickled for network transit.
"""
# RFC 2109 lists these attributes as reserved:
# path comment domain
# max-age secure version
#
# For historical reasons, these attributes are also reserved:
# expires
#
# This is an extension from Microsoft:
# httponly
#
# This dictionary provides a mapping from the lowercase
# variant on the left to the appropriate traditional
# formatting on the right.
_reserved = {
"expires" : "expires",
"path" : "Path",
"comment" : "Comment",
"domain" : "Domain",
"max-age" : "Max-Age",
"secure" : "Secure",
"httponly" : "HttpOnly",
"version" : "Version",
}
_flags = {'secure', 'httponly'}
def __init__(self):
# Set defaults
self._key = self._value = self._coded_value = None
# Set default attributes
for key in self._reserved:
dict.__setitem__(self, key, "")
@property
def key(self):
return self._key
@key.setter
def key(self, key):
_warn_deprecated_setter('key')
self._key = key
@property
def value(self):
return self._value
@value.setter
def value(self, value):
_warn_deprecated_setter('value')
self._value = value
@property
def coded_value(self):
return self._coded_value
@coded_value.setter
def coded_value(self, coded_value):
_warn_deprecated_setter('coded_value')
self._coded_value = coded_value
def __setitem__(self, K, V):
K = K.lower()
if not K in self._reserved:
raise CookieError("Invalid attribute %r" % (K,))
dict.__setitem__(self, K, V)
def setdefault(self, key, val=None):
key = key.lower()
if key not in self._reserved:
raise CookieError("Invalid attribute %r" % (key,))
return dict.setdefault(self, key, val)
def __eq__(self, morsel):
if not isinstance(morsel, Morsel):
return NotImplemented
return (dict.__eq__(self, morsel) and
self._value == morsel._value and
self._key == morsel._key and
self._coded_value == morsel._coded_value)
__ne__ = object.__ne__
def copy(self):
morsel = Morsel()
dict.update(morsel, self)
morsel.__dict__.update(self.__dict__)
return morsel
def update(self, values):
data = {}
for key, val in dict(values).items():
key = key.lower()
if key not in self._reserved:
raise CookieError("Invalid attribute %r" % (key,))
data[key] = val
dict.update(self, data)
def isReservedKey(self, K):
return K.lower() in self._reserved
def set(self, key, val, coded_val, LegalChars=_LegalChars):
if LegalChars != _LegalChars:
import warnings
warnings.warn(
'LegalChars parameter is deprecated, ignored and will '
'be removed in future versions.', DeprecationWarning,
stacklevel=2)
if key.lower() in self._reserved:
raise CookieError('Attempt to set a reserved key %r' % (key,))
if not _is_legal_key(key):
raise CookieError('Illegal key %r' % (key,))
# It's a good key, so save it.
self._key = key
self._value = val
self._coded_value = coded_val
def __getstate__(self):
return {
'key': self._key,
'value': self._value,
'coded_value': self._coded_value,
}
def __setstate__(self, state):
self._key = state['key']
self._value = state['value']
self._coded_value = state['coded_value']
def output(self, attrs=None, header="Set-Cookie:"):
return "%s %s" % (header, self.OutputString(attrs))
__str__ = output
def __repr__(self):
return '<%s: %s>' % (self.__class__.__name__, self.OutputString())
def js_output(self, attrs=None):
# Print javascript
return """
<script type="text/javascript">
<!-- begin hiding
document.cookie = \"%s\";
// end hiding -->
</script>
""" % (self.OutputString(attrs).replace('"', r'\"'))
def OutputString(self, attrs=None):
# Build up our result
#
result = []
append = result.append
# First, the key=value pair
append("%s=%s" % (self.key, self.coded_value))
# Now add any defined attributes
if attrs is None:
attrs = self._reserved
items = sorted(self.items())
for key, value in items:
if value == "":
continue
if key not in attrs:
continue
if key == "expires" and isinstance(value, int):
append("%s=%s" % (self._reserved[key], _getdate(value)))
elif key == "max-age" and isinstance(value, int):
append("%s=%d" % (self._reserved[key], value))
elif key in self._flags:
if value:
append(str(self._reserved[key]))
else:
append("%s=%s" % (self._reserved[key], value))
# Return the result
return _semispacejoin(result)
#
# Pattern for finding cookie
#
# This used to be strict parsing based on the RFC2109 and RFC2068
# specifications. I have since discovered that MSIE 3.0x doesn't
# follow the character rules outlined in those specs. As a
# result, the parsing rules here are less strict.
#
_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
_LegalValueChars = _LegalKeyChars + r'\[\]'
_CookiePattern = re.compile(r"""
\s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
[""" + _LegalKeyChars + r"""]+? # Any word of at least one letter
) # End of group 'key'
( # Optional group: there may not be a value.
\s*=\s* # Equal Sign
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any doublequoted string
| # or
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
| # or
[""" + _LegalValueChars + r"""]* # Any word or empty string
) # End of group 'val'
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
""", re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe.
# At long last, here is the cookie class. Using this class is almost just like
# using a dictionary. See this module's docstring for example usage.
#
class BaseCookie(dict):
"""A container class for a set of Morsels."""
def value_decode(self, val):
"""real_value, coded_value = value_decode(STRING)
Called prior to setting a cookie's value from the network
representation. The VALUE is the value read from HTTP
header.
Override this function to modify the behavior of cookies.
"""
return val, val
def value_encode(self, val):
"""real_value, coded_value = value_encode(VALUE)
Called prior to setting a cookie's value from the dictionary
representation. The VALUE is the value being assigned.
Override this function to modify the behavior of cookies.
"""
strval = str(val)
return strval, strval
def __init__(self, input=None):
if input:
self.load(input)
def __set(self, key, real_value, coded_value):
"""Private method for setting a cookie's value"""
M = self.get(key, Morsel())
M.set(key, real_value, coded_value)
dict.__setitem__(self, key, M)
def __setitem__(self, key, value):
"""Dictionary style assignment."""
if isinstance(value, Morsel):
# allow assignment of constructed Morsels (e.g. for pickling)
dict.__setitem__(self, key, value)
else:
rval, cval = self.value_encode(value)
self.__set(key, rval, cval)
def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"):
"""Return a string suitable for HTTP."""
result = []
items = sorted(self.items())
for key, value in items:
result.append(value.output(attrs, header))
return sep.join(result)
__str__ = output
def __repr__(self):
l = []
items = sorted(self.items())
for key, value in items:
l.append('%s=%s' % (key, repr(value.value)))
return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l))
def js_output(self, attrs=None):
"""Return a string suitable for JavaScript."""
result = []
items = sorted(self.items())
for key, value in items:
result.append(value.js_output(attrs))
return _nulljoin(result)
def load(self, rawdata):
"""Load cookies from a string (presumably HTTP_COOKIE) or
from a dictionary. Loading cookies from a dictionary 'd'
is equivalent to calling:
map(Cookie.__setitem__, d.keys(), d.values())
"""
if isinstance(rawdata, str):
self.__parse_string(rawdata)
else:
# self.update() wouldn't call our custom __setitem__
for key, value in rawdata.items():
self[key] = value
return
def __parse_string(self, str, patt=_CookiePattern):
i = 0 # Our starting point
n = len(str) # Length of string
parsed_items = [] # Parsed (type, key, value) triples
morsel_seen = False # A key=value pair was previously encountered
TYPE_ATTRIBUTE = 1
TYPE_KEYVALUE = 2
# We first parse the whole cookie string and reject it if it's
# syntactically invalid (this helps avoid some classes of injection
# attacks).
while 0 <= i < n:
# Start looking for a cookie
match = patt.match(str, i)
if not match:
# No more cookies
break
key, value = match.group("key"), match.group("val")
i = match.end(0)
if key[0] == "$":
if not morsel_seen:
# We ignore attributes which pertain to the cookie
# mechanism as a whole, such as "$Version".
# See RFC 2965. (Does anyone care?)
continue
parsed_items.append((TYPE_ATTRIBUTE, key[1:], value))
elif key.lower() in Morsel._reserved:
if not morsel_seen:
# Invalid cookie string
return
if value is None:
if key.lower() in Morsel._flags:
parsed_items.append((TYPE_ATTRIBUTE, key, True))
else:
# Invalid cookie string
return
else:
parsed_items.append((TYPE_ATTRIBUTE, key, _unquote(value)))
elif value is not None:
parsed_items.append((TYPE_KEYVALUE, key, self.value_decode(value)))
morsel_seen = True
else:
# Invalid cookie string
return
# The cookie string is valid, apply it.
M = None # current morsel
for tp, key, value in parsed_items:
if tp == TYPE_ATTRIBUTE:
assert M is not None
M[key] = value
else:
assert tp == TYPE_KEYVALUE
rval, cval = value
self.__set(key, rval, cval)
M = self[key]
class SimpleCookie(BaseCookie):
"""
SimpleCookie supports strings as cookie values. When setting
the value using the dictionary assignment notation, SimpleCookie
calls the builtin str() to convert the value to a string. Values
received from HTTP are kept as strings.
"""
def value_decode(self, val):
return _unquote(val), val
def value_encode(self, val):
strval = str(val)
return strval, _quote(strval)

1211
Lib/http/server.py Normal file

File diff suppressed because it is too large Load Diff

597
Lib/mimetypes.py vendored Normal file
View File

@@ -0,0 +1,597 @@
"""Guess the MIME type of a file.
This module defines two useful functions:
guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
guess_extension(type, strict=True) -- guess the extension for a given MIME type.
It also contains the following, for tuning the behavior:
Data:
knownfiles -- list of files to parse
inited -- flag set when init() has been called
suffix_map -- dictionary mapping suffixes to suffixes
encodings_map -- dictionary mapping suffixes to encodings
types_map -- dictionary mapping suffixes to types
Functions:
init([files]) -- parse a list of files, default knownfiles (on Windows, the
default values are taken from the registry)
read_mime_types(file) -- parse one file, return a dictionary or None
"""
import os
import sys
import posixpath
import urllib.parse
try:
import winreg as _winreg
except ImportError:
_winreg = None
__all__ = [
"knownfiles", "inited", "MimeTypes",
"guess_type", "guess_all_extensions", "guess_extension",
"add_type", "init", "read_mime_types",
"suffix_map", "encodings_map", "types_map", "common_types"
]
knownfiles = [
"/etc/mime.types",
"/etc/httpd/mime.types", # Mac OS X
"/etc/httpd/conf/mime.types", # Apache
"/etc/apache/mime.types", # Apache 1
"/etc/apache2/mime.types", # Apache 2
"/usr/local/etc/httpd/conf/mime.types",
"/usr/local/lib/netscape/mime.types",
"/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
"/usr/local/etc/mime.types", # Apache 1.3
]
inited = False
_db = None
class MimeTypes:
"""MIME-types datastore.
This datastore can handle information from mime.types-style files
and supports basic determination of MIME type from a filename or
URL, and can guess a reasonable extension given a MIME type.
"""
def __init__(self, filenames=(), strict=True):
if not inited:
init()
self.encodings_map = encodings_map.copy()
self.suffix_map = suffix_map.copy()
self.types_map = ({}, {}) # dict for (non-strict, strict)
self.types_map_inv = ({}, {})
for (ext, type) in types_map.items():
self.add_type(type, ext, True)
for (ext, type) in common_types.items():
self.add_type(type, ext, False)
for name in filenames:
self.read(name, strict)
def add_type(self, type, ext, strict=True):
"""Add a mapping between a type and an extension.
When the extension is already known, the new
type will replace the old one. When the type
is already known the extension will be added
to the list of known extensions.
If strict is true, information will be added to
list of standard types, else to the list of non-standard
types.
"""
self.types_map[strict][ext] = type
exts = self.types_map_inv[strict].setdefault(type, [])
if ext not in exts:
exts.append(ext)
def guess_type(self, url, strict=True):
"""Guess the type of a file based on its URL.
Return value is a tuple (type, encoding) where type is None if
the type can't be guessed (no or unknown suffix) or a string
of the form type/subtype, usable for a MIME Content-type
header; and encoding is None for no encoding or the name of
the program used to encode (e.g. compress or gzip). The
mappings are table driven. Encoding suffixes are case
sensitive; type suffixes are first tried case sensitive, then
case insensitive.
The suffixes .tgz, .taz and .tz (case sensitive!) are all
mapped to '.tar.gz'. (This is table-driven too, using the
dictionary suffix_map.)
Optional `strict' argument when False adds a bunch of commonly found,
but non-standard types.
"""
scheme, url = urllib.parse.splittype(url)
if scheme == 'data':
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
# mediatype := [ type "/" subtype ] *( ";" parameter )
# data := *urlchar
# parameter := attribute "=" value
# type/subtype defaults to "text/plain"
comma = url.find(',')
if comma < 0:
# bad data URL
return None, None
semi = url.find(';', 0, comma)
if semi >= 0:
type = url[:semi]
else:
type = url[:comma]
if '=' in type or '/' not in type:
type = 'text/plain'
return type, None # never compressed, so encoding is None
base, ext = posixpath.splitext(url)
while ext in self.suffix_map:
base, ext = posixpath.splitext(base + self.suffix_map[ext])
if ext in self.encodings_map:
encoding = self.encodings_map[ext]
base, ext = posixpath.splitext(base)
else:
encoding = None
types_map = self.types_map[True]
if ext in types_map:
return types_map[ext], encoding
elif ext.lower() in types_map:
return types_map[ext.lower()], encoding
elif strict:
return None, encoding
types_map = self.types_map[False]
if ext in types_map:
return types_map[ext], encoding
elif ext.lower() in types_map:
return types_map[ext.lower()], encoding
else:
return None, encoding
def guess_all_extensions(self, type, strict=True):
"""Guess the extensions for a file based on its MIME type.
Return value is a list of strings giving the possible filename
extensions, including the leading dot ('.'). The extension is not
guaranteed to have been associated with any particular data stream,
but would be mapped to the MIME type `type' by guess_type().
Optional `strict' argument when false adds a bunch of commonly found,
but non-standard types.
"""
type = type.lower()
extensions = self.types_map_inv[True].get(type, [])
if not strict:
for ext in self.types_map_inv[False].get(type, []):
if ext not in extensions:
extensions.append(ext)
return extensions
def guess_extension(self, type, strict=True):
"""Guess the extension for a file based on its MIME type.
Return value is a string giving a filename extension,
including the leading dot ('.'). The extension is not
guaranteed to have been associated with any particular data
stream, but would be mapped to the MIME type `type' by
guess_type(). If no extension can be guessed for `type', None
is returned.
Optional `strict' argument when false adds a bunch of commonly found,
but non-standard types.
"""
extensions = self.guess_all_extensions(type, strict)
if not extensions:
return None
return extensions[0]
def read(self, filename, strict=True):
"""
Read a single mime.types-format file, specified by pathname.
If strict is true, information will be added to
list of standard types, else to the list of non-standard
types.
"""
with open(filename, encoding='utf-8') as fp:
self.readfp(fp, strict)
def readfp(self, fp, strict=True):
"""
Read a single mime.types-format file.
If strict is true, information will be added to
list of standard types, else to the list of non-standard
types.
"""
while 1:
line = fp.readline()
if not line:
break
words = line.split()
for i in range(len(words)):
if words[i][0] == '#':
del words[i:]
break
if not words:
continue
type, suffixes = words[0], words[1:]
for suff in suffixes:
self.add_type(type, '.' + suff, strict)
def read_windows_registry(self, strict=True):
"""
Load the MIME types database from Windows registry.
If strict is true, information will be added to
list of standard types, else to the list of non-standard
types.
"""
# Windows only
if not _winreg:
return
def enum_types(mimedb):
i = 0
while True:
try:
ctype = _winreg.EnumKey(mimedb, i)
except EnvironmentError:
break
else:
if '\0' not in ctype:
yield ctype
i += 1
with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
for subkeyname in enum_types(hkcr):
try:
with _winreg.OpenKey(hkcr, subkeyname) as subkey:
# Only check file extensions
if not subkeyname.startswith("."):
continue
# raises EnvironmentError if no 'Content Type' value
mimetype, datatype = _winreg.QueryValueEx(
subkey, 'Content Type')
if datatype != _winreg.REG_SZ:
continue
self.add_type(mimetype, subkeyname, strict)
except EnvironmentError:
continue
def guess_type(url, strict=True):
"""Guess the type of a file based on its URL.
Return value is a tuple (type, encoding) where type is None if the
type can't be guessed (no or unknown suffix) or a string of the
form type/subtype, usable for a MIME Content-type header; and
encoding is None for no encoding or the name of the program used
to encode (e.g. compress or gzip). The mappings are table
driven. Encoding suffixes are case sensitive; type suffixes are
first tried case sensitive, then case insensitive.
The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
to ".tar.gz". (This is table-driven too, using the dictionary
suffix_map).
Optional `strict' argument when false adds a bunch of commonly found, but
non-standard types.
"""
if _db is None:
init()
return _db.guess_type(url, strict)
def guess_all_extensions(type, strict=True):
"""Guess the extensions for a file based on its MIME type.
Return value is a list of strings giving the possible filename
extensions, including the leading dot ('.'). The extension is not
guaranteed to have been associated with any particular data
stream, but would be mapped to the MIME type `type' by
guess_type(). If no extension can be guessed for `type', None
is returned.
Optional `strict' argument when false adds a bunch of commonly found,
but non-standard types.
"""
if _db is None:
init()
return _db.guess_all_extensions(type, strict)
def guess_extension(type, strict=True):
"""Guess the extension for a file based on its MIME type.
Return value is a string giving a filename extension, including the
leading dot ('.'). The extension is not guaranteed to have been
associated with any particular data stream, but would be mapped to the
MIME type `type' by guess_type(). If no extension can be guessed for
`type', None is returned.
Optional `strict' argument when false adds a bunch of commonly found,
but non-standard types.
"""
if _db is None:
init()
return _db.guess_extension(type, strict)
def add_type(type, ext, strict=True):
"""Add a mapping between a type and an extension.
When the extension is already known, the new
type will replace the old one. When the type
is already known the extension will be added
to the list of known extensions.
If strict is true, information will be added to
list of standard types, else to the list of non-standard
types.
"""
if _db is None:
init()
return _db.add_type(type, ext, strict)
def init(files=None):
global suffix_map, types_map, encodings_map, common_types
global inited, _db
inited = True # so that MimeTypes.__init__() doesn't call us again
db = MimeTypes()
if files is None:
if _winreg:
db.read_windows_registry()
files = knownfiles
for file in files:
if os.path.isfile(file):
db.read(file)
encodings_map = db.encodings_map
suffix_map = db.suffix_map
types_map = db.types_map[True]
common_types = db.types_map[False]
# Make the DB a global variable now that it is fully initialized
_db = db
def read_mime_types(file):
try:
f = open(file)
except OSError:
return None
with f:
db = MimeTypes()
db.readfp(f, True)
return db.types_map[True]
def _default_mime_types():
global suffix_map
global encodings_map
global types_map
global common_types
suffix_map = {
'.svgz': '.svg.gz',
'.tgz': '.tar.gz',
'.taz': '.tar.gz',
'.tz': '.tar.gz',
'.tbz2': '.tar.bz2',
'.txz': '.tar.xz',
}
encodings_map = {
'.gz': 'gzip',
'.Z': 'compress',
'.bz2': 'bzip2',
'.xz': 'xz',
}
# Before adding new types, make sure they are either registered with IANA,
# at http://www.iana.org/assignments/media-types
# or extensions, i.e. using the x- prefix
# If you add to these, please keep them sorted!
types_map = {
'.a' : 'application/octet-stream',
'.ai' : 'application/postscript',
'.aif' : 'audio/x-aiff',
'.aifc' : 'audio/x-aiff',
'.aiff' : 'audio/x-aiff',
'.au' : 'audio/basic',
'.avi' : 'video/x-msvideo',
'.bat' : 'text/plain',
'.bcpio' : 'application/x-bcpio',
'.bin' : 'application/octet-stream',
'.bmp' : 'image/x-ms-bmp',
'.c' : 'text/plain',
# Duplicates :(
'.cdf' : 'application/x-cdf',
'.cdf' : 'application/x-netcdf',
'.cpio' : 'application/x-cpio',
'.csh' : 'application/x-csh',
'.css' : 'text/css',
'.csv' : 'text/csv',
'.dll' : 'application/octet-stream',
'.doc' : 'application/msword',
'.dot' : 'application/msword',
'.dvi' : 'application/x-dvi',
'.eml' : 'message/rfc822',
'.eps' : 'application/postscript',
'.etx' : 'text/x-setext',
'.exe' : 'application/octet-stream',
'.gif' : 'image/gif',
'.gtar' : 'application/x-gtar',
'.h' : 'text/plain',
'.hdf' : 'application/x-hdf',
'.htm' : 'text/html',
'.html' : 'text/html',
'.ico' : 'image/vnd.microsoft.icon',
'.ief' : 'image/ief',
'.jpe' : 'image/jpeg',
'.jpeg' : 'image/jpeg',
'.jpg' : 'image/jpeg',
'.js' : 'application/javascript',
'.ksh' : 'text/plain',
'.latex' : 'application/x-latex',
'.m1v' : 'video/mpeg',
'.m3u' : 'application/vnd.apple.mpegurl',
'.m3u8' : 'application/vnd.apple.mpegurl',
'.man' : 'application/x-troff-man',
'.me' : 'application/x-troff-me',
'.mht' : 'message/rfc822',
'.mhtml' : 'message/rfc822',
'.mif' : 'application/x-mif',
'.mov' : 'video/quicktime',
'.movie' : 'video/x-sgi-movie',
'.mp2' : 'audio/mpeg',
'.mp3' : 'audio/mpeg',
'.mp4' : 'video/mp4',
'.mpa' : 'video/mpeg',
'.mpe' : 'video/mpeg',
'.mpeg' : 'video/mpeg',
'.mpg' : 'video/mpeg',
'.ms' : 'application/x-troff-ms',
'.nc' : 'application/x-netcdf',
'.nws' : 'message/rfc822',
'.o' : 'application/octet-stream',
'.obj' : 'application/octet-stream',
'.oda' : 'application/oda',
'.p12' : 'application/x-pkcs12',
'.p7c' : 'application/pkcs7-mime',
'.pbm' : 'image/x-portable-bitmap',
'.pdf' : 'application/pdf',
'.pfx' : 'application/x-pkcs12',
'.pgm' : 'image/x-portable-graymap',
'.pl' : 'text/plain',
'.png' : 'image/png',
'.pnm' : 'image/x-portable-anymap',
'.pot' : 'application/vnd.ms-powerpoint',
'.ppa' : 'application/vnd.ms-powerpoint',
'.ppm' : 'image/x-portable-pixmap',
'.pps' : 'application/vnd.ms-powerpoint',
'.ppt' : 'application/vnd.ms-powerpoint',
'.ps' : 'application/postscript',
'.pwz' : 'application/vnd.ms-powerpoint',
'.py' : 'text/x-python',
'.pyc' : 'application/x-python-code',
'.pyo' : 'application/x-python-code',
'.qt' : 'video/quicktime',
'.ra' : 'audio/x-pn-realaudio',
'.ram' : 'application/x-pn-realaudio',
'.ras' : 'image/x-cmu-raster',
'.rdf' : 'application/xml',
'.rgb' : 'image/x-rgb',
'.roff' : 'application/x-troff',
'.rtx' : 'text/richtext',
'.sgm' : 'text/x-sgml',
'.sgml' : 'text/x-sgml',
'.sh' : 'application/x-sh',
'.shar' : 'application/x-shar',
'.snd' : 'audio/basic',
'.so' : 'application/octet-stream',
'.src' : 'application/x-wais-source',
'.sv4cpio': 'application/x-sv4cpio',
'.sv4crc' : 'application/x-sv4crc',
'.svg' : 'image/svg+xml',
'.swf' : 'application/x-shockwave-flash',
'.t' : 'application/x-troff',
'.tar' : 'application/x-tar',
'.tcl' : 'application/x-tcl',
'.tex' : 'application/x-tex',
'.texi' : 'application/x-texinfo',
'.texinfo': 'application/x-texinfo',
'.tif' : 'image/tiff',
'.tiff' : 'image/tiff',
'.tr' : 'application/x-troff',
'.tsv' : 'text/tab-separated-values',
'.txt' : 'text/plain',
'.ustar' : 'application/x-ustar',
'.vcf' : 'text/x-vcard',
'.wav' : 'audio/x-wav',
'.webm' : 'video/webm',
'.wiz' : 'application/msword',
'.wsdl' : 'application/xml',
'.xbm' : 'image/x-xbitmap',
'.xlb' : 'application/vnd.ms-excel',
# Duplicates :(
'.xls' : 'application/excel',
'.xls' : 'application/vnd.ms-excel',
'.xml' : 'text/xml',
'.xpdl' : 'application/xml',
'.xpm' : 'image/x-xpixmap',
'.xsl' : 'application/xml',
'.xwd' : 'image/x-xwindowdump',
'.zip' : 'application/zip',
}
# These are non-standard types, commonly found in the wild. They will
# only match if strict=0 flag is given to the API methods.
# Please sort these too
common_types = {
'.jpg' : 'image/jpg',
'.mid' : 'audio/midi',
'.midi': 'audio/midi',
'.pct' : 'image/pict',
'.pic' : 'image/pict',
'.pict': 'image/pict',
'.rtf' : 'application/rtf',
'.xul' : 'text/xul'
}
_default_mime_types()
if __name__ == '__main__':
import getopt
USAGE = """\
Usage: mimetypes.py [options] type
Options:
--help / -h -- print this message and exit
--lenient / -l -- additionally search of some common, but non-standard
types.
--extension / -e -- guess extension instead of type
More than one type argument may be given.
"""
def usage(code, msg=''):
print(USAGE)
if msg: print(msg)
sys.exit(code)
try:
opts, args = getopt.getopt(sys.argv[1:], 'hle',
['help', 'lenient', 'extension'])
except getopt.error as msg:
usage(1, msg)
strict = 1
extension = 0
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-l', '--lenient'):
strict = 0
elif opt in ('-e', '--extension'):
extension = 1
for gtype in args:
if extension:
guess = guess_extension(gtype, strict)
if not guess: print("I don't know anything about type", gtype)
else: print(guess)
else:
guess, encoding = guess_type(gtype, strict)
if not guess: print("I don't know anything about type", gtype)
else: print('type:', guess, 'encoding:', encoding)

795
Lib/socketserver.py vendored Normal file
View File

@@ -0,0 +1,795 @@
"""Generic socket server classes.
This module tries to capture the various aspects of defining a server:
For socket-based servers:
- address family:
- AF_INET{,6}: IP (Internet Protocol) sockets (default)
- AF_UNIX: Unix domain sockets
- others, e.g. AF_DECNET are conceivable (see <socket.h>
- socket type:
- SOCK_STREAM (reliable stream, e.g. TCP)
- SOCK_DGRAM (datagrams, e.g. UDP)
For request-based servers (including socket-based):
- client address verification before further looking at the request
(This is actually a hook for any processing that needs to look
at the request before anything else, e.g. logging)
- how to handle multiple requests:
- synchronous (one request is handled at a time)
- forking (each request is handled by a new process)
- threading (each request is handled by a new thread)
The classes in this module favor the server type that is simplest to
write: a synchronous TCP/IP server. This is bad class design, but
save some typing. (There's also the issue that a deep class hierarchy
slows down method lookups.)
There are five classes in an inheritance diagram, four of which represent
synchronous servers of four types:
+------------+
| BaseServer |
+------------+
|
v
+-----------+ +------------------+
| TCPServer |------->| UnixStreamServer |
+-----------+ +------------------+
|
v
+-----------+ +--------------------+
| UDPServer |------->| UnixDatagramServer |
+-----------+ +--------------------+
Note that UnixDatagramServer derives from UDPServer, not from
UnixStreamServer -- the only difference between an IP and a Unix
stream server is the address family, which is simply repeated in both
unix server classes.
Forking and threading versions of each type of server can be created
using the ForkingMixIn and ThreadingMixIn mix-in classes. For
instance, a threading UDP server class is created as follows:
class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
The Mix-in class must come first, since it overrides a method defined
in UDPServer! Setting the various member variables also changes
the behavior of the underlying server mechanism.
To implement a service, you must derive a class from
BaseRequestHandler and redefine its handle() method. You can then run
various versions of the service by combining one of the server classes
with your request handler class.
The request handler class must be different for datagram or stream
services. This can be hidden by using the request handler
subclasses StreamRequestHandler or DatagramRequestHandler.
Of course, you still have to use your head!
For instance, it makes no sense to use a forking server if the service
contains state in memory that can be modified by requests (since the
modifications in the child process would never reach the initial state
kept in the parent process and passed to each child). In this case,
you can use a threading server, but you will probably have to use
locks to avoid two requests that come in nearly simultaneous to apply
conflicting changes to the server state.
On the other hand, if you are building e.g. an HTTP server, where all
data is stored externally (e.g. in the file system), a synchronous
class will essentially render the service "deaf" while one request is
being handled -- which may be for a very long time if a client is slow
to read all the data it has requested. Here a threading or forking
server is appropriate.
In some cases, it may be appropriate to process part of a request
synchronously, but to finish processing in a forked child depending on
the request data. This can be implemented by using a synchronous
server and doing an explicit fork in the request handler class
handle() method.
Another approach to handling multiple simultaneous requests in an
environment that supports neither threads nor fork (or where these are
too expensive or inappropriate for the service) is to maintain an
explicit table of partially finished requests and to use a selector to
decide which request to work on next (or whether to handle a new
incoming request). This is particularly important for stream services
where each client can potentially be connected for a long time (if
threads or subprocesses cannot be used).
Future work:
- Standard classes for Sun RPC (which uses either UDP or TCP)
- Standard mix-in classes to implement various authentication
and encryption schemes
XXX Open problems:
- What to do with out-of-band data?
BaseServer:
- split generic "request" functionality out into BaseServer class.
Copyright (C) 2000 Luke Kenneth Casson Leighton <lkcl@samba.org>
example: read entries from a SQL database (requires overriding
get_request() to return a table entry from the database).
entry is processed by a RequestHandlerClass.
"""
# Author of the BaseServer patch: Luke Kenneth Casson Leighton
__version__ = "0.4"
import socket
import selectors
import os
import errno
import sys
try:
import threading
except ImportError:
import dummy_threading as threading
from io import BufferedIOBase
from time import monotonic as time
__all__ = ["BaseServer", "TCPServer", "UDPServer",
"ThreadingUDPServer", "ThreadingTCPServer",
"BaseRequestHandler", "StreamRequestHandler",
"DatagramRequestHandler", "ThreadingMixIn"]
if hasattr(os, "fork"):
__all__.extend(["ForkingUDPServer","ForkingTCPServer", "ForkingMixIn"])
if hasattr(socket, "AF_UNIX"):
__all__.extend(["UnixStreamServer","UnixDatagramServer",
"ThreadingUnixStreamServer",
"ThreadingUnixDatagramServer"])
# poll/select have the advantage of not requiring any extra file descriptor,
# contrarily to epoll/kqueue (also, they require a single syscall).
if hasattr(selectors, 'PollSelector'):
_ServerSelector = selectors.PollSelector
else:
_ServerSelector = selectors.SelectSelector
class BaseServer:
"""Base class for server classes.
Methods for the caller:
- __init__(server_address, RequestHandlerClass)
- serve_forever(poll_interval=0.5)
- shutdown()
- handle_request() # if you do not use serve_forever()
- fileno() -> int # for selector
Methods that may be overridden:
- server_bind()
- server_activate()
- get_request() -> request, client_address
- handle_timeout()
- verify_request(request, client_address)
- server_close()
- process_request(request, client_address)
- shutdown_request(request)
- close_request(request)
- service_actions()
- handle_error()
Methods for derived classes:
- finish_request(request, client_address)
Class variables that may be overridden by derived classes or
instances:
- timeout
- address_family
- socket_type
- allow_reuse_address
Instance variables:
- RequestHandlerClass
- socket
"""
timeout = None
def __init__(self, server_address, RequestHandlerClass):
"""Constructor. May be extended, do not override."""
self.server_address = server_address
self.RequestHandlerClass = RequestHandlerClass
self.__is_shut_down = threading.Event()
self.__shutdown_request = False
def server_activate(self):
"""Called by constructor to activate the server.
May be overridden.
"""
pass
def serve_forever(self, poll_interval=0.5):
"""Handle one request at a time until shutdown.
Polls for shutdown every poll_interval seconds. Ignores
self.timeout. If you need to do periodic tasks, do them in
another thread.
"""
self.__is_shut_down.clear()
try:
# XXX: Consider using another file descriptor or connecting to the
# socket to wake this up instead of polling. Polling reduces our
# responsiveness to a shutdown request and wastes cpu at all other
# times.
with _ServerSelector() as selector:
selector.register(self, selectors.EVENT_READ)
while not self.__shutdown_request:
ready = selector.select(poll_interval)
if ready:
self._handle_request_noblock()
self.service_actions()
finally:
self.__shutdown_request = False
self.__is_shut_down.set()
def shutdown(self):
"""Stops the serve_forever loop.
Blocks until the loop has finished. This must be called while
serve_forever() is running in another thread, or it will
deadlock.
"""
self.__shutdown_request = True
self.__is_shut_down.wait()
def service_actions(self):
"""Called by the serve_forever() loop.
May be overridden by a subclass / Mixin to implement any code that
needs to be run during the loop.
"""
pass
# The distinction between handling, getting, processing and finishing a
# request is fairly arbitrary. Remember:
#
# - handle_request() is the top-level call. It calls selector.select(),
# get_request(), verify_request() and process_request()
# - get_request() is different for stream or datagram sockets
# - process_request() is the place that may fork a new process or create a
# new thread to finish the request
# - finish_request() instantiates the request handler class; this
# constructor will handle the request all by itself
def handle_request(self):
"""Handle one request, possibly blocking.
Respects self.timeout.
"""
# Support people who used socket.settimeout() to escape
# handle_request before self.timeout was available.
timeout = self.socket.gettimeout()
if timeout is None:
timeout = self.timeout
elif self.timeout is not None:
timeout = min(timeout, self.timeout)
if timeout is not None:
deadline = time() + timeout
# Wait until a request arrives or the timeout expires - the loop is
# necessary to accommodate early wakeups due to EINTR.
with _ServerSelector() as selector:
selector.register(self, selectors.EVENT_READ)
while True:
ready = selector.select(timeout)
if ready:
return self._handle_request_noblock()
else:
if timeout is not None:
timeout = deadline - time()
if timeout < 0:
return self.handle_timeout()
def _handle_request_noblock(self):
"""Handle one request, without blocking.
I assume that selector.select() has returned that the socket is
readable before this function was called, so there should be no risk of
blocking in get_request().
"""
try:
request, client_address = self.get_request()
except OSError:
return
if self.verify_request(request, client_address):
try:
self.process_request(request, client_address)
except Exception:
self.handle_error(request, client_address)
self.shutdown_request(request)
except:
self.shutdown_request(request)
raise
else:
self.shutdown_request(request)
def handle_timeout(self):
"""Called if no new request arrives within self.timeout.
Overridden by ForkingMixIn.
"""
pass
def verify_request(self, request, client_address):
"""Verify the request. May be overridden.
Return True if we should proceed with this request.
"""
return True
def process_request(self, request, client_address):
"""Call finish_request.
Overridden by ForkingMixIn and ThreadingMixIn.
"""
self.finish_request(request, client_address)
self.shutdown_request(request)
def server_close(self):
"""Called to clean-up the server.
May be overridden.
"""
pass
def finish_request(self, request, client_address):
"""Finish one request by instantiating RequestHandlerClass."""
self.RequestHandlerClass(request, client_address, self)
def shutdown_request(self, request):
"""Called to shutdown and close an individual request."""
self.close_request(request)
def close_request(self, request):
"""Called to clean up an individual request."""
pass
def handle_error(self, request, client_address):
"""Handle an error gracefully. May be overridden.
The default is to print a traceback and continue.
"""
print('-'*40, file=sys.stderr)
print('Exception happened during processing of request from',
client_address, file=sys.stderr)
import traceback
traceback.print_exc()
print('-'*40, file=sys.stderr)
def __enter__(self):
return self
def __exit__(self, *args):
self.server_close()
class TCPServer(BaseServer):
"""Base class for various socket-based server classes.
Defaults to synchronous IP stream (i.e., TCP).
Methods for the caller:
- __init__(server_address, RequestHandlerClass, bind_and_activate=True)
- serve_forever(poll_interval=0.5)
- shutdown()
- handle_request() # if you don't use serve_forever()
- fileno() -> int # for selector
Methods that may be overridden:
- server_bind()
- server_activate()
- get_request() -> request, client_address
- handle_timeout()
- verify_request(request, client_address)
- process_request(request, client_address)
- shutdown_request(request)
- close_request(request)
- handle_error()
Methods for derived classes:
- finish_request(request, client_address)
Class variables that may be overridden by derived classes or
instances:
- timeout
- address_family
- socket_type
- request_queue_size (only for stream sockets)
- allow_reuse_address
Instance variables:
- server_address
- RequestHandlerClass
- socket
"""
address_family = socket.AF_INET
socket_type = socket.SOCK_STREAM
request_queue_size = 5
allow_reuse_address = False
def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True):
"""Constructor. May be extended, do not override."""
BaseServer.__init__(self, server_address, RequestHandlerClass)
self.socket = socket.socket(self.address_family,
self.socket_type)
if bind_and_activate:
try:
self.server_bind()
self.server_activate()
except:
self.server_close()
raise
def server_bind(self):
"""Called by constructor to bind the socket.
May be overridden.
"""
if self.allow_reuse_address:
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.socket.bind(self.server_address)
self.server_address = self.socket.getsockname()
def server_activate(self):
"""Called by constructor to activate the server.
May be overridden.
"""
self.socket.listen(self.request_queue_size)
def server_close(self):
"""Called to clean-up the server.
May be overridden.
"""
self.socket.close()
def fileno(self):
"""Return socket file number.
Interface required by selector.
"""
return self.socket.fileno()
def get_request(self):
"""Get the request and client address from the socket.
May be overridden.
"""
return self.socket.accept()
def shutdown_request(self, request):
"""Called to shutdown and close an individual request."""
try:
#explicitly shutdown. socket.close() merely releases
#the socket and waits for GC to perform the actual close.
request.shutdown(socket.SHUT_WR)
except OSError:
pass #some platforms may raise ENOTCONN here
self.close_request(request)
def close_request(self, request):
"""Called to clean up an individual request."""
request.close()
class UDPServer(TCPServer):
"""UDP server class."""
allow_reuse_address = False
socket_type = socket.SOCK_DGRAM
max_packet_size = 8192
def get_request(self):
data, client_addr = self.socket.recvfrom(self.max_packet_size)
return (data, self.socket), client_addr
def server_activate(self):
# No need to call listen() for UDP.
pass
def shutdown_request(self, request):
# No need to shutdown anything.
self.close_request(request)
def close_request(self, request):
# No need to close anything.
pass
if hasattr(os, "fork"):
class ForkingMixIn:
"""Mix-in class to handle each request in a new process."""
timeout = 300
active_children = None
max_children = 40
def collect_children(self):
"""Internal routine to wait for children that have exited."""
if self.active_children is None:
return
# If we're above the max number of children, wait and reap them until
# we go back below threshold. Note that we use waitpid(-1) below to be
# able to collect children in size(<defunct children>) syscalls instead
# of size(<children>): the downside is that this might reap children
# which we didn't spawn, which is why we only resort to this when we're
# above max_children.
while len(self.active_children) >= self.max_children:
try:
pid, _ = os.waitpid(-1, 0)
self.active_children.discard(pid)
except ChildProcessError:
# we don't have any children, we're done
self.active_children.clear()
except OSError:
break
# Now reap all defunct children.
for pid in self.active_children.copy():
try:
pid, _ = os.waitpid(pid, os.WNOHANG)
# if the child hasn't exited yet, pid will be 0 and ignored by
# discard() below
self.active_children.discard(pid)
except ChildProcessError:
# someone else reaped it
self.active_children.discard(pid)
except OSError:
pass
def handle_timeout(self):
"""Wait for zombies after self.timeout seconds of inactivity.
May be extended, do not override.
"""
self.collect_children()
def service_actions(self):
"""Collect the zombie child processes regularly in the ForkingMixIn.
service_actions is called in the BaseServer's serve_forver loop.
"""
self.collect_children()
def process_request(self, request, client_address):
"""Fork a new subprocess to process the request."""
pid = os.fork()
if pid:
# Parent process
if self.active_children is None:
self.active_children = set()
self.active_children.add(pid)
self.close_request(request)
return
else:
# Child process.
# This must never return, hence os._exit()!
status = 1
try:
self.finish_request(request, client_address)
status = 0
except Exception:
self.handle_error(request, client_address)
finally:
try:
self.shutdown_request(request)
finally:
os._exit(status)
class ThreadingMixIn:
"""Mix-in class to handle each request in a new thread."""
# Decides how threads will act upon termination of the
# main process
daemon_threads = False
def process_request_thread(self, request, client_address):
"""Same as in BaseServer but as a thread.
In addition, exception handling is done here.
"""
try:
self.finish_request(request, client_address)
except Exception:
self.handle_error(request, client_address)
finally:
self.shutdown_request(request)
def process_request(self, request, client_address):
"""Start a new thread to process the request."""
t = threading.Thread(target = self.process_request_thread,
args = (request, client_address))
t.daemon = self.daemon_threads
t.start()
if hasattr(os, "fork"):
class ForkingUDPServer(ForkingMixIn, UDPServer): pass
class ForkingTCPServer(ForkingMixIn, TCPServer): pass
class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass
if hasattr(socket, 'AF_UNIX'):
class UnixStreamServer(TCPServer):
address_family = socket.AF_UNIX
class UnixDatagramServer(UDPServer):
address_family = socket.AF_UNIX
class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass
class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass
class BaseRequestHandler:
"""Base class for request handler classes.
This class is instantiated for each request to be handled. The
constructor sets the instance variables request, client_address
and server, and then calls the handle() method. To implement a
specific service, all you need to do is to derive a class which
defines a handle() method.
The handle() method can find the request as self.request, the
client address as self.client_address, and the server (in case it
needs access to per-server information) as self.server. Since a
separate instance is created for each request, the handle() method
can define other arbitrary instance variables.
"""
def __init__(self, request, client_address, server):
self.request = request
self.client_address = client_address
self.server = server
self.setup()
try:
self.handle()
finally:
self.finish()
def setup(self):
pass
def handle(self):
pass
def finish(self):
pass
# The following two classes make it possible to use the same service
# class for stream or datagram servers.
# Each class sets up these instance variables:
# - rfile: a file object from which receives the request is read
# - wfile: a file object to which the reply is written
# When the handle() method returns, wfile is flushed properly
class StreamRequestHandler(BaseRequestHandler):
"""Define self.rfile and self.wfile for stream sockets."""
# Default buffer sizes for rfile, wfile.
# We default rfile to buffered because otherwise it could be
# really slow for large data (a getc() call per byte); we make
# wfile unbuffered because (a) often after a write() we want to
# read and we need to flush the line; (b) big writes to unbuffered
# files are typically optimized by stdio even when big reads
# aren't.
rbufsize = -1
wbufsize = 0
# A timeout to apply to the request socket, if not None.
timeout = None
# Disable nagle algorithm for this socket, if True.
# Use only when wbufsize != 0, to avoid small packets.
disable_nagle_algorithm = False
def setup(self):
self.connection = self.request
if self.timeout is not None:
self.connection.settimeout(self.timeout)
if self.disable_nagle_algorithm:
self.connection.setsockopt(socket.IPPROTO_TCP,
socket.TCP_NODELAY, True)
self.rfile = self.connection.makefile('rb', self.rbufsize)
if self.wbufsize == 0:
self.wfile = _SocketWriter(self.connection)
else:
self.wfile = self.connection.makefile('wb', self.wbufsize)
def finish(self):
if not self.wfile.closed:
try:
self.wfile.flush()
except socket.error:
# A final socket error may have occurred here, such as
# the local error ECONNABORTED.
pass
self.wfile.close()
self.rfile.close()
class _SocketWriter(BufferedIOBase):
"""Simple writable BufferedIOBase implementation for a socket
Does not hold data in a buffer, avoiding any need to call flush()."""
def __init__(self, sock):
self._sock = sock
def writable(self):
return True
def write(self, b):
self._sock.sendall(b)
# XXX RustPython TODO: implement memoryview properly
#with memoryview(b) as view:
# return view.nbytes
return len(b)
def fileno(self):
return self._sock.fileno()
class DatagramRequestHandler(BaseRequestHandler):
"""Define self.rfile and self.wfile for datagram sockets."""
def setup(self):
from io import BytesIO
self.packet, self.socket = self.request
self.rfile = BytesIO(self.packet)
self.wfile = BytesIO()
def finish(self):
self.socket.sendto(self.wfile.getvalue(), self.client_address)

272
Lib/stringprep.py vendored Normal file
View File

@@ -0,0 +1,272 @@
# This file is generated by mkstringprep.py. DO NOT EDIT.
"""Library that exposes various tables found in the StringPrep RFC 3454.
There are two kinds of tables: sets, for which a member test is provided,
and mappings, for which a mapping function is provided.
"""
from unicodedata import ucd_3_2_0 as unicodedata
assert unicodedata.unidata_version == '3.2.0'
def in_table_a1(code):
if unicodedata.category(code) != 'Cn': return False
c = ord(code)
if 0xFDD0 <= c < 0xFDF0: return False
return (c & 0xFFFF) not in (0xFFFE, 0xFFFF)
b1_set = set([173, 847, 6150, 6155, 6156, 6157, 8203, 8204, 8205, 8288, 65279] + list(range(65024,65040)))
def in_table_b1(code):
return ord(code) in b1_set
b3_exceptions = {
0xb5:'\u03bc', 0xdf:'ss', 0x130:'i\u0307', 0x149:'\u02bcn',
0x17f:'s', 0x1f0:'j\u030c', 0x345:'\u03b9', 0x37a:' \u03b9',
0x390:'\u03b9\u0308\u0301', 0x3b0:'\u03c5\u0308\u0301', 0x3c2:'\u03c3', 0x3d0:'\u03b2',
0x3d1:'\u03b8', 0x3d2:'\u03c5', 0x3d3:'\u03cd', 0x3d4:'\u03cb',
0x3d5:'\u03c6', 0x3d6:'\u03c0', 0x3f0:'\u03ba', 0x3f1:'\u03c1',
0x3f2:'\u03c3', 0x3f5:'\u03b5', 0x587:'\u0565\u0582', 0x1e96:'h\u0331',
0x1e97:'t\u0308', 0x1e98:'w\u030a', 0x1e99:'y\u030a', 0x1e9a:'a\u02be',
0x1e9b:'\u1e61', 0x1f50:'\u03c5\u0313', 0x1f52:'\u03c5\u0313\u0300', 0x1f54:'\u03c5\u0313\u0301',
0x1f56:'\u03c5\u0313\u0342', 0x1f80:'\u1f00\u03b9', 0x1f81:'\u1f01\u03b9', 0x1f82:'\u1f02\u03b9',
0x1f83:'\u1f03\u03b9', 0x1f84:'\u1f04\u03b9', 0x1f85:'\u1f05\u03b9', 0x1f86:'\u1f06\u03b9',
0x1f87:'\u1f07\u03b9', 0x1f88:'\u1f00\u03b9', 0x1f89:'\u1f01\u03b9', 0x1f8a:'\u1f02\u03b9',
0x1f8b:'\u1f03\u03b9', 0x1f8c:'\u1f04\u03b9', 0x1f8d:'\u1f05\u03b9', 0x1f8e:'\u1f06\u03b9',
0x1f8f:'\u1f07\u03b9', 0x1f90:'\u1f20\u03b9', 0x1f91:'\u1f21\u03b9', 0x1f92:'\u1f22\u03b9',
0x1f93:'\u1f23\u03b9', 0x1f94:'\u1f24\u03b9', 0x1f95:'\u1f25\u03b9', 0x1f96:'\u1f26\u03b9',
0x1f97:'\u1f27\u03b9', 0x1f98:'\u1f20\u03b9', 0x1f99:'\u1f21\u03b9', 0x1f9a:'\u1f22\u03b9',
0x1f9b:'\u1f23\u03b9', 0x1f9c:'\u1f24\u03b9', 0x1f9d:'\u1f25\u03b9', 0x1f9e:'\u1f26\u03b9',
0x1f9f:'\u1f27\u03b9', 0x1fa0:'\u1f60\u03b9', 0x1fa1:'\u1f61\u03b9', 0x1fa2:'\u1f62\u03b9',
0x1fa3:'\u1f63\u03b9', 0x1fa4:'\u1f64\u03b9', 0x1fa5:'\u1f65\u03b9', 0x1fa6:'\u1f66\u03b9',
0x1fa7:'\u1f67\u03b9', 0x1fa8:'\u1f60\u03b9', 0x1fa9:'\u1f61\u03b9', 0x1faa:'\u1f62\u03b9',
0x1fab:'\u1f63\u03b9', 0x1fac:'\u1f64\u03b9', 0x1fad:'\u1f65\u03b9', 0x1fae:'\u1f66\u03b9',
0x1faf:'\u1f67\u03b9', 0x1fb2:'\u1f70\u03b9', 0x1fb3:'\u03b1\u03b9', 0x1fb4:'\u03ac\u03b9',
0x1fb6:'\u03b1\u0342', 0x1fb7:'\u03b1\u0342\u03b9', 0x1fbc:'\u03b1\u03b9', 0x1fbe:'\u03b9',
0x1fc2:'\u1f74\u03b9', 0x1fc3:'\u03b7\u03b9', 0x1fc4:'\u03ae\u03b9', 0x1fc6:'\u03b7\u0342',
0x1fc7:'\u03b7\u0342\u03b9', 0x1fcc:'\u03b7\u03b9', 0x1fd2:'\u03b9\u0308\u0300', 0x1fd3:'\u03b9\u0308\u0301',
0x1fd6:'\u03b9\u0342', 0x1fd7:'\u03b9\u0308\u0342', 0x1fe2:'\u03c5\u0308\u0300', 0x1fe3:'\u03c5\u0308\u0301',
0x1fe4:'\u03c1\u0313', 0x1fe6:'\u03c5\u0342', 0x1fe7:'\u03c5\u0308\u0342', 0x1ff2:'\u1f7c\u03b9',
0x1ff3:'\u03c9\u03b9', 0x1ff4:'\u03ce\u03b9', 0x1ff6:'\u03c9\u0342', 0x1ff7:'\u03c9\u0342\u03b9',
0x1ffc:'\u03c9\u03b9', 0x20a8:'rs', 0x2102:'c', 0x2103:'\xb0c',
0x2107:'\u025b', 0x2109:'\xb0f', 0x210b:'h', 0x210c:'h',
0x210d:'h', 0x2110:'i', 0x2111:'i', 0x2112:'l',
0x2115:'n', 0x2116:'no', 0x2119:'p', 0x211a:'q',
0x211b:'r', 0x211c:'r', 0x211d:'r', 0x2120:'sm',
0x2121:'tel', 0x2122:'tm', 0x2124:'z', 0x2128:'z',
0x212c:'b', 0x212d:'c', 0x2130:'e', 0x2131:'f',
0x2133:'m', 0x213e:'\u03b3', 0x213f:'\u03c0', 0x2145:'d',
0x3371:'hpa', 0x3373:'au', 0x3375:'ov', 0x3380:'pa',
0x3381:'na', 0x3382:'\u03bca', 0x3383:'ma', 0x3384:'ka',
0x3385:'kb', 0x3386:'mb', 0x3387:'gb', 0x338a:'pf',
0x338b:'nf', 0x338c:'\u03bcf', 0x3390:'hz', 0x3391:'khz',
0x3392:'mhz', 0x3393:'ghz', 0x3394:'thz', 0x33a9:'pa',
0x33aa:'kpa', 0x33ab:'mpa', 0x33ac:'gpa', 0x33b4:'pv',
0x33b5:'nv', 0x33b6:'\u03bcv', 0x33b7:'mv', 0x33b8:'kv',
0x33b9:'mv', 0x33ba:'pw', 0x33bb:'nw', 0x33bc:'\u03bcw',
0x33bd:'mw', 0x33be:'kw', 0x33bf:'mw', 0x33c0:'k\u03c9',
0x33c1:'m\u03c9', 0x33c3:'bq', 0x33c6:'c\u2215kg', 0x33c7:'co.',
0x33c8:'db', 0x33c9:'gy', 0x33cb:'hp', 0x33cd:'kk',
0x33ce:'km', 0x33d7:'ph', 0x33d9:'ppm', 0x33da:'pr',
0x33dc:'sv', 0x33dd:'wb', 0xfb00:'ff', 0xfb01:'fi',
0xfb02:'fl', 0xfb03:'ffi', 0xfb04:'ffl', 0xfb05:'st',
0xfb06:'st', 0xfb13:'\u0574\u0576', 0xfb14:'\u0574\u0565', 0xfb15:'\u0574\u056b',
0xfb16:'\u057e\u0576', 0xfb17:'\u0574\u056d', 0x1d400:'a', 0x1d401:'b',
0x1d402:'c', 0x1d403:'d', 0x1d404:'e', 0x1d405:'f',
0x1d406:'g', 0x1d407:'h', 0x1d408:'i', 0x1d409:'j',
0x1d40a:'k', 0x1d40b:'l', 0x1d40c:'m', 0x1d40d:'n',
0x1d40e:'o', 0x1d40f:'p', 0x1d410:'q', 0x1d411:'r',
0x1d412:'s', 0x1d413:'t', 0x1d414:'u', 0x1d415:'v',
0x1d416:'w', 0x1d417:'x', 0x1d418:'y', 0x1d419:'z',
0x1d434:'a', 0x1d435:'b', 0x1d436:'c', 0x1d437:'d',
0x1d438:'e', 0x1d439:'f', 0x1d43a:'g', 0x1d43b:'h',
0x1d43c:'i', 0x1d43d:'j', 0x1d43e:'k', 0x1d43f:'l',
0x1d440:'m', 0x1d441:'n', 0x1d442:'o', 0x1d443:'p',
0x1d444:'q', 0x1d445:'r', 0x1d446:'s', 0x1d447:'t',
0x1d448:'u', 0x1d449:'v', 0x1d44a:'w', 0x1d44b:'x',
0x1d44c:'y', 0x1d44d:'z', 0x1d468:'a', 0x1d469:'b',
0x1d46a:'c', 0x1d46b:'d', 0x1d46c:'e', 0x1d46d:'f',
0x1d46e:'g', 0x1d46f:'h', 0x1d470:'i', 0x1d471:'j',
0x1d472:'k', 0x1d473:'l', 0x1d474:'m', 0x1d475:'n',
0x1d476:'o', 0x1d477:'p', 0x1d478:'q', 0x1d479:'r',
0x1d47a:'s', 0x1d47b:'t', 0x1d47c:'u', 0x1d47d:'v',
0x1d47e:'w', 0x1d47f:'x', 0x1d480:'y', 0x1d481:'z',
0x1d49c:'a', 0x1d49e:'c', 0x1d49f:'d', 0x1d4a2:'g',
0x1d4a5:'j', 0x1d4a6:'k', 0x1d4a9:'n', 0x1d4aa:'o',
0x1d4ab:'p', 0x1d4ac:'q', 0x1d4ae:'s', 0x1d4af:'t',
0x1d4b0:'u', 0x1d4b1:'v', 0x1d4b2:'w', 0x1d4b3:'x',
0x1d4b4:'y', 0x1d4b5:'z', 0x1d4d0:'a', 0x1d4d1:'b',
0x1d4d2:'c', 0x1d4d3:'d', 0x1d4d4:'e', 0x1d4d5:'f',
0x1d4d6:'g', 0x1d4d7:'h', 0x1d4d8:'i', 0x1d4d9:'j',
0x1d4da:'k', 0x1d4db:'l', 0x1d4dc:'m', 0x1d4dd:'n',
0x1d4de:'o', 0x1d4df:'p', 0x1d4e0:'q', 0x1d4e1:'r',
0x1d4e2:'s', 0x1d4e3:'t', 0x1d4e4:'u', 0x1d4e5:'v',
0x1d4e6:'w', 0x1d4e7:'x', 0x1d4e8:'y', 0x1d4e9:'z',
0x1d504:'a', 0x1d505:'b', 0x1d507:'d', 0x1d508:'e',
0x1d509:'f', 0x1d50a:'g', 0x1d50d:'j', 0x1d50e:'k',
0x1d50f:'l', 0x1d510:'m', 0x1d511:'n', 0x1d512:'o',
0x1d513:'p', 0x1d514:'q', 0x1d516:'s', 0x1d517:'t',
0x1d518:'u', 0x1d519:'v', 0x1d51a:'w', 0x1d51b:'x',
0x1d51c:'y', 0x1d538:'a', 0x1d539:'b', 0x1d53b:'d',
0x1d53c:'e', 0x1d53d:'f', 0x1d53e:'g', 0x1d540:'i',
0x1d541:'j', 0x1d542:'k', 0x1d543:'l', 0x1d544:'m',
0x1d546:'o', 0x1d54a:'s', 0x1d54b:'t', 0x1d54c:'u',
0x1d54d:'v', 0x1d54e:'w', 0x1d54f:'x', 0x1d550:'y',
0x1d56c:'a', 0x1d56d:'b', 0x1d56e:'c', 0x1d56f:'d',
0x1d570:'e', 0x1d571:'f', 0x1d572:'g', 0x1d573:'h',
0x1d574:'i', 0x1d575:'j', 0x1d576:'k', 0x1d577:'l',
0x1d578:'m', 0x1d579:'n', 0x1d57a:'o', 0x1d57b:'p',
0x1d57c:'q', 0x1d57d:'r', 0x1d57e:'s', 0x1d57f:'t',
0x1d580:'u', 0x1d581:'v', 0x1d582:'w', 0x1d583:'x',
0x1d584:'y', 0x1d585:'z', 0x1d5a0:'a', 0x1d5a1:'b',
0x1d5a2:'c', 0x1d5a3:'d', 0x1d5a4:'e', 0x1d5a5:'f',
0x1d5a6:'g', 0x1d5a7:'h', 0x1d5a8:'i', 0x1d5a9:'j',
0x1d5aa:'k', 0x1d5ab:'l', 0x1d5ac:'m', 0x1d5ad:'n',
0x1d5ae:'o', 0x1d5af:'p', 0x1d5b0:'q', 0x1d5b1:'r',
0x1d5b2:'s', 0x1d5b3:'t', 0x1d5b4:'u', 0x1d5b5:'v',
0x1d5b6:'w', 0x1d5b7:'x', 0x1d5b8:'y', 0x1d5b9:'z',
0x1d5d4:'a', 0x1d5d5:'b', 0x1d5d6:'c', 0x1d5d7:'d',
0x1d5d8:'e', 0x1d5d9:'f', 0x1d5da:'g', 0x1d5db:'h',
0x1d5dc:'i', 0x1d5dd:'j', 0x1d5de:'k', 0x1d5df:'l',
0x1d5e0:'m', 0x1d5e1:'n', 0x1d5e2:'o', 0x1d5e3:'p',
0x1d5e4:'q', 0x1d5e5:'r', 0x1d5e6:'s', 0x1d5e7:'t',
0x1d5e8:'u', 0x1d5e9:'v', 0x1d5ea:'w', 0x1d5eb:'x',
0x1d5ec:'y', 0x1d5ed:'z', 0x1d608:'a', 0x1d609:'b',
0x1d60a:'c', 0x1d60b:'d', 0x1d60c:'e', 0x1d60d:'f',
0x1d60e:'g', 0x1d60f:'h', 0x1d610:'i', 0x1d611:'j',
0x1d612:'k', 0x1d613:'l', 0x1d614:'m', 0x1d615:'n',
0x1d616:'o', 0x1d617:'p', 0x1d618:'q', 0x1d619:'r',
0x1d61a:'s', 0x1d61b:'t', 0x1d61c:'u', 0x1d61d:'v',
0x1d61e:'w', 0x1d61f:'x', 0x1d620:'y', 0x1d621:'z',
0x1d63c:'a', 0x1d63d:'b', 0x1d63e:'c', 0x1d63f:'d',
0x1d640:'e', 0x1d641:'f', 0x1d642:'g', 0x1d643:'h',
0x1d644:'i', 0x1d645:'j', 0x1d646:'k', 0x1d647:'l',
0x1d648:'m', 0x1d649:'n', 0x1d64a:'o', 0x1d64b:'p',
0x1d64c:'q', 0x1d64d:'r', 0x1d64e:'s', 0x1d64f:'t',
0x1d650:'u', 0x1d651:'v', 0x1d652:'w', 0x1d653:'x',
0x1d654:'y', 0x1d655:'z', 0x1d670:'a', 0x1d671:'b',
0x1d672:'c', 0x1d673:'d', 0x1d674:'e', 0x1d675:'f',
0x1d676:'g', 0x1d677:'h', 0x1d678:'i', 0x1d679:'j',
0x1d67a:'k', 0x1d67b:'l', 0x1d67c:'m', 0x1d67d:'n',
0x1d67e:'o', 0x1d67f:'p', 0x1d680:'q', 0x1d681:'r',
0x1d682:'s', 0x1d683:'t', 0x1d684:'u', 0x1d685:'v',
0x1d686:'w', 0x1d687:'x', 0x1d688:'y', 0x1d689:'z',
0x1d6a8:'\u03b1', 0x1d6a9:'\u03b2', 0x1d6aa:'\u03b3', 0x1d6ab:'\u03b4',
0x1d6ac:'\u03b5', 0x1d6ad:'\u03b6', 0x1d6ae:'\u03b7', 0x1d6af:'\u03b8',
0x1d6b0:'\u03b9', 0x1d6b1:'\u03ba', 0x1d6b2:'\u03bb', 0x1d6b3:'\u03bc',
0x1d6b4:'\u03bd', 0x1d6b5:'\u03be', 0x1d6b6:'\u03bf', 0x1d6b7:'\u03c0',
0x1d6b8:'\u03c1', 0x1d6b9:'\u03b8', 0x1d6ba:'\u03c3', 0x1d6bb:'\u03c4',
0x1d6bc:'\u03c5', 0x1d6bd:'\u03c6', 0x1d6be:'\u03c7', 0x1d6bf:'\u03c8',
0x1d6c0:'\u03c9', 0x1d6d3:'\u03c3', 0x1d6e2:'\u03b1', 0x1d6e3:'\u03b2',
0x1d6e4:'\u03b3', 0x1d6e5:'\u03b4', 0x1d6e6:'\u03b5', 0x1d6e7:'\u03b6',
0x1d6e8:'\u03b7', 0x1d6e9:'\u03b8', 0x1d6ea:'\u03b9', 0x1d6eb:'\u03ba',
0x1d6ec:'\u03bb', 0x1d6ed:'\u03bc', 0x1d6ee:'\u03bd', 0x1d6ef:'\u03be',
0x1d6f0:'\u03bf', 0x1d6f1:'\u03c0', 0x1d6f2:'\u03c1', 0x1d6f3:'\u03b8',
0x1d6f4:'\u03c3', 0x1d6f5:'\u03c4', 0x1d6f6:'\u03c5', 0x1d6f7:'\u03c6',
0x1d6f8:'\u03c7', 0x1d6f9:'\u03c8', 0x1d6fa:'\u03c9', 0x1d70d:'\u03c3',
0x1d71c:'\u03b1', 0x1d71d:'\u03b2', 0x1d71e:'\u03b3', 0x1d71f:'\u03b4',
0x1d720:'\u03b5', 0x1d721:'\u03b6', 0x1d722:'\u03b7', 0x1d723:'\u03b8',
0x1d724:'\u03b9', 0x1d725:'\u03ba', 0x1d726:'\u03bb', 0x1d727:'\u03bc',
0x1d728:'\u03bd', 0x1d729:'\u03be', 0x1d72a:'\u03bf', 0x1d72b:'\u03c0',
0x1d72c:'\u03c1', 0x1d72d:'\u03b8', 0x1d72e:'\u03c3', 0x1d72f:'\u03c4',
0x1d730:'\u03c5', 0x1d731:'\u03c6', 0x1d732:'\u03c7', 0x1d733:'\u03c8',
0x1d734:'\u03c9', 0x1d747:'\u03c3', 0x1d756:'\u03b1', 0x1d757:'\u03b2',
0x1d758:'\u03b3', 0x1d759:'\u03b4', 0x1d75a:'\u03b5', 0x1d75b:'\u03b6',
0x1d75c:'\u03b7', 0x1d75d:'\u03b8', 0x1d75e:'\u03b9', 0x1d75f:'\u03ba',
0x1d760:'\u03bb', 0x1d761:'\u03bc', 0x1d762:'\u03bd', 0x1d763:'\u03be',
0x1d764:'\u03bf', 0x1d765:'\u03c0', 0x1d766:'\u03c1', 0x1d767:'\u03b8',
0x1d768:'\u03c3', 0x1d769:'\u03c4', 0x1d76a:'\u03c5', 0x1d76b:'\u03c6',
0x1d76c:'\u03c7', 0x1d76d:'\u03c8', 0x1d76e:'\u03c9', 0x1d781:'\u03c3',
0x1d790:'\u03b1', 0x1d791:'\u03b2', 0x1d792:'\u03b3', 0x1d793:'\u03b4',
0x1d794:'\u03b5', 0x1d795:'\u03b6', 0x1d796:'\u03b7', 0x1d797:'\u03b8',
0x1d798:'\u03b9', 0x1d799:'\u03ba', 0x1d79a:'\u03bb', 0x1d79b:'\u03bc',
0x1d79c:'\u03bd', 0x1d79d:'\u03be', 0x1d79e:'\u03bf', 0x1d79f:'\u03c0',
0x1d7a0:'\u03c1', 0x1d7a1:'\u03b8', 0x1d7a2:'\u03c3', 0x1d7a3:'\u03c4',
0x1d7a4:'\u03c5', 0x1d7a5:'\u03c6', 0x1d7a6:'\u03c7', 0x1d7a7:'\u03c8',
0x1d7a8:'\u03c9', 0x1d7bb:'\u03c3', }
def map_table_b3(code):
r = b3_exceptions.get(ord(code))
if r is not None: return r
return code.lower()
def map_table_b2(a):
al = map_table_b3(a)
b = unicodedata.normalize("NFKC", al)
bl = "".join([map_table_b3(ch) for ch in b])
c = unicodedata.normalize("NFKC", bl)
if b != c:
return c
else:
return al
def in_table_c11(code):
return code == " "
def in_table_c12(code):
return unicodedata.category(code) == "Zs" and code != " "
def in_table_c11_c12(code):
return unicodedata.category(code) == "Zs"
def in_table_c21(code):
return ord(code) < 128 and unicodedata.category(code) == "Cc"
c22_specials = set([1757, 1807, 6158, 8204, 8205, 8232, 8233, 65279] + list(range(8288,8292)) + list(range(8298,8304)) + list(range(65529,65533)) + list(range(119155,119163)))
def in_table_c22(code):
c = ord(code)
if c < 128: return False
if unicodedata.category(code) == "Cc": return True
return c in c22_specials
def in_table_c21_c22(code):
return unicodedata.category(code) == "Cc" or \
ord(code) in c22_specials
def in_table_c3(code):
return unicodedata.category(code) == "Co"
def in_table_c4(code):
c = ord(code)
if c < 0xFDD0: return False
if c < 0xFDF0: return True
return (ord(code) & 0xFFFF) in (0xFFFE, 0xFFFF)
def in_table_c5(code):
return unicodedata.category(code) == "Cs"
c6_set = set(range(65529,65534))
def in_table_c6(code):
return ord(code) in c6_set
c7_set = set(range(12272,12284))
def in_table_c7(code):
return ord(code) in c7_set
c8_set = set([832, 833, 8206, 8207] + list(range(8234,8239)) + list(range(8298,8304)))
def in_table_c8(code):
return ord(code) in c8_set
c9_set = set([917505] + list(range(917536,917632)))
def in_table_c9(code):
return ord(code) in c9_set
def in_table_d1(code):
return unicodedata.bidirectional(code) in ("R","AL")
def in_table_d2(code):
return unicodedata.bidirectional(code) == "L"

0
Lib/urllib/__init__.py Normal file
View File

81
Lib/urllib/error.py Normal file
View File

@@ -0,0 +1,81 @@
"""Exception classes raised by urllib.
The base exception class is URLError, which inherits from OSError. It
doesn't define any behavior of its own, but is the base class for all
exceptions defined in this package.
HTTPError is an exception class that is also a valid HTTP response
instance. It behaves this way because HTTP protocol errors are valid
responses, with a status code, headers, and a body. In some contexts,
an application may want to handle an exception like a regular
response.
"""
import urllib.response
__all__ = ['URLError', 'HTTPError', 'ContentTooShortError']
# do these error classes make sense?
# make sure all of the OSError stuff is overridden. we just want to be
# subtypes.
class URLError(OSError):
# URLError is a sub-type of OSError, but it doesn't share any of
# the implementation. need to override __init__ and __str__.
# It sets self.args for compatibility with other EnvironmentError
# subclasses, but args doesn't have the typical format with errno in
# slot 0 and strerror in slot 1. This may be better than nothing.
def __init__(self, reason, filename=None):
self.args = reason,
self.reason = reason
if filename is not None:
self.filename = filename
def __str__(self):
return '<urlopen error %s>' % self.reason
class HTTPError(URLError, urllib.response.addinfourl):
"""Raised when HTTP error occurs, but also acts like non-error return"""
__super_init = urllib.response.addinfourl.__init__
def __init__(self, url, code, msg, hdrs, fp):
self.code = code
self.msg = msg
self.hdrs = hdrs
self.fp = fp
self.filename = url
# The addinfourl classes depend on fp being a valid file
# object. In some cases, the HTTPError may not have a valid
# file object. If this happens, the simplest workaround is to
# not initialize the base classes.
if fp is not None:
self.__super_init(fp, hdrs, url, code)
def __str__(self):
return 'HTTP Error %s: %s' % (self.code, self.msg)
def __repr__(self):
return '<HTTPError %s: %r>' % (self.code, self.msg)
# since URLError specifies a .reason attribute, HTTPError should also
# provide this attribute. See issue13211 for discussion.
@property
def reason(self):
return self.msg
@property
def headers(self):
return self.hdrs
@headers.setter
def headers(self, headers):
self.hdrs = headers
class ContentTooShortError(URLError):
"""Exception raised when downloaded size does not match content-length."""
def __init__(self, message, content):
URLError.__init__(self, message)
self.content = content

1009
Lib/urllib/parse.py Normal file

File diff suppressed because it is too large Load Diff

2743
Lib/urllib/request.py Normal file

File diff suppressed because it is too large Load Diff

80
Lib/urllib/response.py Normal file
View File

@@ -0,0 +1,80 @@
"""Response classes used by urllib.
The base class, addbase, defines a minimal file-like interface,
including read() and readline(). The typical response object is an
addinfourl instance, which defines an info() method that returns
headers and a geturl() method that returns the url.
"""
import tempfile
__all__ = ['addbase', 'addclosehook', 'addinfo', 'addinfourl']
class addbase(tempfile._TemporaryFileWrapper):
"""Base class for addinfo and addclosehook. Is a good idea for garbage collection."""
# XXX Add a method to expose the timeout on the underlying socket?
def __init__(self, fp):
super(addbase, self).__init__(fp, '<urllib response>', delete=False)
# Keep reference around as this was part of the original API.
self.fp = fp
def __repr__(self):
return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
id(self), self.file)
def __enter__(self):
if self.fp.closed:
raise ValueError("I/O operation on closed file")
return self
def __exit__(self, type, value, traceback):
self.close()
class addclosehook(addbase):
"""Class to add a close hook to an open file."""
def __init__(self, fp, closehook, *hookargs):
super(addclosehook, self).__init__(fp)
self.closehook = closehook
self.hookargs = hookargs
def close(self):
try:
closehook = self.closehook
hookargs = self.hookargs
if closehook:
self.closehook = None
self.hookargs = None
closehook(*hookargs)
finally:
super(addclosehook, self).close()
class addinfo(addbase):
"""class to add an info() method to an open file."""
def __init__(self, fp, headers):
super(addinfo, self).__init__(fp)
self.headers = headers
def info(self):
return self.headers
class addinfourl(addinfo):
"""class to add info() and geturl() methods to an open file."""
def __init__(self, fp, headers, url, code=None):
super(addinfourl, self).__init__(fp, headers)
self.url = url
self.code = code
def getcode(self):
return self.code
def geturl(self):
return self.url

251
Lib/urllib/robotparser.py Normal file
View File

@@ -0,0 +1,251 @@
""" robotparser.py
Copyright (C) 2000 Bastian Kleineidam
You can choose between two licenses when using this package:
1) GNU GPLv2
2) PSF license for Python 2.2
The robots.txt Exclusion Protocol is implemented as specified in
http://www.robotstxt.org/norobots-rfc.txt
"""
import collections
import urllib.parse
import urllib.request
__all__ = ["RobotFileParser"]
class RobotFileParser:
""" This class provides a set of methods to read, parse and answer
questions about a single robots.txt file.
"""
def __init__(self, url=''):
self.entries = []
self.default_entry = None
self.disallow_all = False
self.allow_all = False
self.set_url(url)
self.last_checked = 0
def mtime(self):
"""Returns the time the robots.txt file was last fetched.
This is useful for long-running web spiders that need to
check for new robots.txt files periodically.
"""
return self.last_checked
def modified(self):
"""Sets the time the robots.txt file was last fetched to the
current time.
"""
import time
self.last_checked = time.time()
def set_url(self, url):
"""Sets the URL referring to a robots.txt file."""
self.url = url
self.host, self.path = urllib.parse.urlparse(url)[1:3]
def read(self):
"""Reads the robots.txt URL and feeds it to the parser."""
try:
f = urllib.request.urlopen(self.url)
except urllib.error.HTTPError as err:
if err.code in (401, 403):
self.disallow_all = True
elif err.code >= 400 and err.code < 500:
self.allow_all = True
else:
raw = f.read()
self.parse(raw.decode("utf-8").splitlines())
def _add_entry(self, entry):
if "*" in entry.useragents:
# the default entry is considered last
if self.default_entry is None:
# the first default entry wins
self.default_entry = entry
else:
self.entries.append(entry)
def parse(self, lines):
"""Parse the input lines from a robots.txt file.
We allow that a user-agent: line is not preceded by
one or more blank lines.
"""
# states:
# 0: start state
# 1: saw user-agent line
# 2: saw an allow or disallow line
state = 0
entry = Entry()
self.modified()
for line in lines:
if not line:
if state == 1:
entry = Entry()
state = 0
elif state == 2:
self._add_entry(entry)
entry = Entry()
state = 0
# remove optional comment and strip line
i = line.find('#')
if i >= 0:
line = line[:i]
line = line.strip()
if not line:
continue
line = line.split(':', 1)
if len(line) == 2:
line[0] = line[0].strip().lower()
line[1] = urllib.parse.unquote(line[1].strip())
if line[0] == "user-agent":
if state == 2:
self._add_entry(entry)
entry = Entry()
entry.useragents.append(line[1])
state = 1
elif line[0] == "disallow":
if state != 0:
entry.rulelines.append(RuleLine(line[1], False))
state = 2
elif line[0] == "allow":
if state != 0:
entry.rulelines.append(RuleLine(line[1], True))
state = 2
elif line[0] == "crawl-delay":
if state != 0:
# before trying to convert to int we need to make
# sure that robots.txt has valid syntax otherwise
# it will crash
if line[1].strip().isdigit():
entry.delay = int(line[1])
state = 2
elif line[0] == "request-rate":
if state != 0:
numbers = line[1].split('/')
# check if all values are sane
if (len(numbers) == 2 and numbers[0].strip().isdigit()
and numbers[1].strip().isdigit()):
req_rate = collections.namedtuple('req_rate',
'requests seconds')
entry.req_rate = req_rate
entry.req_rate.requests = int(numbers[0])
entry.req_rate.seconds = int(numbers[1])
state = 2
if state == 2:
self._add_entry(entry)
def can_fetch(self, useragent, url):
"""using the parsed robots.txt decide if useragent can fetch url"""
if self.disallow_all:
return False
if self.allow_all:
return True
# Until the robots.txt file has been read or found not
# to exist, we must assume that no url is allowable.
# This prevents false positives when a user erroneously
# calls can_fetch() before calling read().
if not self.last_checked:
return False
# search for given user agent matches
# the first match counts
parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url))
url = urllib.parse.urlunparse(('','',parsed_url.path,
parsed_url.params,parsed_url.query, parsed_url.fragment))
url = urllib.parse.quote(url)
if not url:
url = "/"
for entry in self.entries:
if entry.applies_to(useragent):
return entry.allowance(url)
# try the default entry last
if self.default_entry:
return self.default_entry.allowance(url)
# agent not found ==> access granted
return True
def crawl_delay(self, useragent):
if not self.mtime():
return None
for entry in self.entries:
if entry.applies_to(useragent):
return entry.delay
return self.default_entry.delay
def request_rate(self, useragent):
if not self.mtime():
return None
for entry in self.entries:
if entry.applies_to(useragent):
return entry.req_rate
return self.default_entry.req_rate
def __str__(self):
return ''.join([str(entry) + "\n" for entry in self.entries])
class RuleLine:
"""A rule line is a single "Allow:" (allowance==True) or "Disallow:"
(allowance==False) followed by a path."""
def __init__(self, path, allowance):
if path == '' and not allowance:
# an empty value means allow all
allowance = True
path = urllib.parse.urlunparse(urllib.parse.urlparse(path))
self.path = urllib.parse.quote(path)
self.allowance = allowance
def applies_to(self, filename):
return self.path == "*" or filename.startswith(self.path)
def __str__(self):
return ("Allow" if self.allowance else "Disallow") + ": " + self.path
class Entry:
"""An entry has one or more user-agents and zero or more rulelines"""
def __init__(self):
self.useragents = []
self.rulelines = []
self.delay = None
self.req_rate = None
def __str__(self):
ret = []
for agent in self.useragents:
ret.extend(["User-agent: ", agent, "\n"])
for line in self.rulelines:
ret.extend([str(line), "\n"])
return ''.join(ret)
def applies_to(self, useragent):
"""check if this entry applies to the specified agent"""
# split the name token and make it lower case
useragent = useragent.split("/")[0].lower()
for agent in self.useragents:
if agent == '*':
# we have the catch-all agent
return True
agent = agent.lower()
if agent in useragent:
return True
return False
def allowance(self, filename):
"""Preconditions:
- our agent applies to this entry
- filename is URL decoded"""
for line in self.rulelines:
if line.applies_to(filename):
return line.allowance
return True

199
Lib/uu.py vendored Executable file
View File

@@ -0,0 +1,199 @@
#! /usr/bin/env python3
# Copyright 1994 by Lance Ellinghouse
# Cathedral City, California Republic, United States of America.
# All Rights Reserved
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted,
# provided that the above copyright notice appear in all copies and that
# both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of Lance Ellinghouse
# not be used in advertising or publicity pertaining to distribution
# of the software without specific, written prior permission.
# LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO
# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE
# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
# Modified by Jack Jansen, CWI, July 1995:
# - Use binascii module to do the actual line-by-line conversion
# between ascii and binary. This results in a 1000-fold speedup. The C
# version is still 5 times faster, though.
# - Arguments more compliant with python standard
"""Implementation of the UUencode and UUdecode functions.
encode(in_file, out_file [,name, mode])
decode(in_file [, out_file, mode])
"""
import binascii
import os
import sys
__all__ = ["Error", "encode", "decode"]
class Error(Exception):
pass
def encode(in_file, out_file, name=None, mode=None):
"""Uuencode file"""
#
# If in_file is a pathname open it and change defaults
#
opened_files = []
try:
if in_file == '-':
in_file = sys.stdin.buffer
elif isinstance(in_file, str):
if name is None:
name = os.path.basename(in_file)
if mode is None:
try:
mode = os.stat(in_file).st_mode
except AttributeError:
pass
in_file = open(in_file, 'rb')
opened_files.append(in_file)
#
# Open out_file if it is a pathname
#
if out_file == '-':
out_file = sys.stdout.buffer
elif isinstance(out_file, str):
out_file = open(out_file, 'wb')
opened_files.append(out_file)
#
# Set defaults for name and mode
#
if name is None:
name = '-'
if mode is None:
mode = 0o666
#
# Write the data
#
out_file.write(('begin %o %s\n' % ((mode & 0o777), name)).encode("ascii"))
data = in_file.read(45)
while len(data) > 0:
out_file.write(binascii.b2a_uu(data))
data = in_file.read(45)
out_file.write(b' \nend\n')
finally:
for f in opened_files:
f.close()
def decode(in_file, out_file=None, mode=None, quiet=False):
"""Decode uuencoded file"""
#
# Open the input file, if needed.
#
opened_files = []
if in_file == '-':
in_file = sys.stdin.buffer
elif isinstance(in_file, str):
in_file = open(in_file, 'rb')
opened_files.append(in_file)
try:
#
# Read until a begin is encountered or we've exhausted the file
#
while True:
hdr = in_file.readline()
if not hdr:
raise Error('No valid begin line found in input file')
if not hdr.startswith(b'begin'):
continue
hdrfields = hdr.split(b' ', 2)
if len(hdrfields) == 3 and hdrfields[0] == b'begin':
try:
int(hdrfields[1], 8)
break
except ValueError:
pass
if out_file is None:
# If the filename isn't ASCII, what's up with that?!?
out_file = hdrfields[2].rstrip(b' \t\r\n\f').decode("ascii")
if os.path.exists(out_file):
raise Error('Cannot overwrite existing file: %s' % out_file)
if mode is None:
mode = int(hdrfields[1], 8)
#
# Open the output file
#
if out_file == '-':
out_file = sys.stdout.buffer
elif isinstance(out_file, str):
fp = open(out_file, 'wb')
try:
os.path.chmod(out_file, mode)
except AttributeError:
pass
out_file = fp
opened_files.append(out_file)
#
# Main decoding loop
#
s = in_file.readline()
while s and s.strip(b' \t\r\n\f') != b'end':
try:
data = binascii.a2b_uu(s)
except binascii.Error as v:
# Workaround for broken uuencoders by /Fredrik Lundh
nbytes = (((s[0]-32) & 63) * 4 + 5) // 3
data = binascii.a2b_uu(s[:nbytes])
if not quiet:
sys.stderr.write("Warning: %s\n" % v)
out_file.write(data)
s = in_file.readline()
if not s:
raise Error('Truncated input file')
finally:
for f in opened_files:
f.close()
def test():
"""uuencode/uudecode main program"""
import optparse
parser = optparse.OptionParser(usage='usage: %prog [-d] [-t] [input [output]]')
parser.add_option('-d', '--decode', dest='decode', help='Decode (instead of encode)?', default=False, action='store_true')
parser.add_option('-t', '--text', dest='text', help='data is text, encoded format unix-compatible text?', default=False, action='store_true')
(options, args) = parser.parse_args()
if len(args) > 2:
parser.error('incorrect number of arguments')
sys.exit(1)
# Use the binary streams underlying stdin/stdout
input = sys.stdin.buffer
output = sys.stdout.buffer
if len(args) > 0:
input = args[0]
if len(args) > 1:
output = args[1]
if options.decode:
if options.text:
if isinstance(output, str):
output = open(output, 'wb')
else:
print(sys.argv[0], ': cannot do -t to stdout')
sys.exit(1)
decode(input, output)
else:
if options.text:
if isinstance(input, str):
input = open(input, 'rb')
else:
print(sys.argv[0], ': cannot do -t from stdin')
sys.exit(1)
encode(input, output)
if __name__ == '__main__':
test()

615
Lib/uuid.py vendored Normal file
View File

@@ -0,0 +1,615 @@
r"""UUID objects (universally unique identifiers) according to RFC 4122.
This module provides immutable UUID objects (class UUID) and the functions
uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5
UUIDs as specified in RFC 4122.
If all you want is a unique ID, you should probably call uuid1() or uuid4().
Note that uuid1() may compromise privacy since it creates a UUID containing
the computer's network address. uuid4() creates a random UUID.
Typical usage:
>>> import uuid
# make a UUID based on the host ID and current time
>>> uuid.uuid1() # doctest: +SKIP
UUID('a8098c1a-f86e-11da-bd1a-00112444be1e')
# make a UUID using an MD5 hash of a namespace UUID and a name
>>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org')
UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e')
# make a random UUID
>>> uuid.uuid4() # doctest: +SKIP
UUID('16fd2706-8baf-433b-82eb-8c7fada847da')
# make a UUID using a SHA-1 hash of a namespace UUID and a name
>>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org')
UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d')
# make a UUID from a string of hex digits (braces and hyphens ignored)
>>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}')
# convert a UUID to a string of hex digits in standard form
>>> str(x)
'00010203-0405-0607-0809-0a0b0c0d0e0f'
# get the raw 16 bytes of the UUID
>>> x.bytes
b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
# make a UUID from a 16-byte string
>>> uuid.UUID(bytes=x.bytes)
UUID('00010203-0405-0607-0809-0a0b0c0d0e0f')
"""
import os
__author__ = 'Ka-Ping Yee <ping@zesty.ca>'
RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [
'reserved for NCS compatibility', 'specified in RFC 4122',
'reserved for Microsoft compatibility', 'reserved for future definition']
int_ = int # The built-in int type
bytes_ = bytes # The built-in bytes type
class UUID(object):
"""Instances of the UUID class represent UUIDs as specified in RFC 4122.
UUID objects are immutable, hashable, and usable as dictionary keys.
Converting a UUID to a string with str() yields something in the form
'12345678-1234-1234-1234-123456789abc'. The UUID constructor accepts
five possible forms: a similar string of hexadecimal digits, or a tuple
of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and
48-bit values respectively) as an argument named 'fields', or a string
of 16 bytes (with all the integer fields in big-endian order) as an
argument named 'bytes', or a string of 16 bytes (with the first three
fields in little-endian order) as an argument named 'bytes_le', or a
single 128-bit integer as an argument named 'int'.
UUIDs have these read-only attributes:
bytes the UUID as a 16-byte string (containing the six
integer fields in big-endian byte order)
bytes_le the UUID as a 16-byte string (with time_low, time_mid,
and time_hi_version in little-endian byte order)
fields a tuple of the six integer fields of the UUID,
which are also available as six individual attributes
and two derived attributes:
time_low the first 32 bits of the UUID
time_mid the next 16 bits of the UUID
time_hi_version the next 16 bits of the UUID
clock_seq_hi_variant the next 8 bits of the UUID
clock_seq_low the next 8 bits of the UUID
node the last 48 bits of the UUID
time the 60-bit timestamp
clock_seq the 14-bit sequence number
hex the UUID as a 32-character hexadecimal string
int the UUID as a 128-bit integer
urn the UUID as a URN as specified in RFC 4122
variant the UUID variant (one of the constants RESERVED_NCS,
RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE)
version the UUID version number (1 through 5, meaningful only
when the variant is RFC_4122)
"""
def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
int=None, version=None):
r"""Create a UUID from either a string of 32 hexadecimal digits,
a string of 16 bytes as the 'bytes' argument, a string of 16 bytes
in little-endian order as the 'bytes_le' argument, a tuple of six
integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version,
8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as
the 'fields' argument, or a single 128-bit integer as the 'int'
argument. When a string of hex digits is given, curly braces,
hyphens, and a URN prefix are all optional. For example, these
expressions all yield the same UUID:
UUID('{12345678-1234-5678-1234-567812345678}')
UUID('12345678123456781234567812345678')
UUID('urn:uuid:12345678-1234-5678-1234-567812345678')
UUID(bytes='\x12\x34\x56\x78'*4)
UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' +
'\x12\x34\x56\x78\x12\x34\x56\x78')
UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678))
UUID(int=0x12345678123456781234567812345678)
Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must
be given. The 'version' argument is optional; if given, the resulting
UUID will have its variant and version set according to RFC 4122,
overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'.
"""
if [hex, bytes, bytes_le, fields, int].count(None) != 4:
raise TypeError('one of the hex, bytes, bytes_le, fields, '
'or int arguments must be given')
if hex is not None:
hex = hex.replace('urn:', '').replace('uuid:', '')
hex = hex.strip('{}').replace('-', '')
if len(hex) != 32:
raise ValueError('badly formed hexadecimal UUID string')
int = int_(hex, 16)
if bytes_le is not None:
if len(bytes_le) != 16:
raise ValueError('bytes_le is not a 16-char string')
bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] +
bytes_le[8-1:6-1:-1] + bytes_le[8:])
if bytes is not None:
if len(bytes) != 16:
raise ValueError('bytes is not a 16-char string')
assert isinstance(bytes, bytes_), repr(bytes)
int = int_.from_bytes(bytes, byteorder='big')
if fields is not None:
if len(fields) != 6:
raise ValueError('fields is not a 6-tuple')
(time_low, time_mid, time_hi_version,
clock_seq_hi_variant, clock_seq_low, node) = fields
if not 0 <= time_low < 1<<32:
raise ValueError('field 1 out of range (need a 32-bit value)')
if not 0 <= time_mid < 1<<16:
raise ValueError('field 2 out of range (need a 16-bit value)')
if not 0 <= time_hi_version < 1<<16:
raise ValueError('field 3 out of range (need a 16-bit value)')
if not 0 <= clock_seq_hi_variant < 1<<8:
raise ValueError('field 4 out of range (need an 8-bit value)')
if not 0 <= clock_seq_low < 1<<8:
raise ValueError('field 5 out of range (need an 8-bit value)')
if not 0 <= node < 1<<48:
raise ValueError('field 6 out of range (need a 48-bit value)')
clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
int = ((time_low << 96) | (time_mid << 80) |
(time_hi_version << 64) | (clock_seq << 48) | node)
if int is not None:
if not 0 <= int < 1<<128:
raise ValueError('int is out of range (need a 128-bit value)')
if version is not None:
if not 1 <= version <= 5:
raise ValueError('illegal version number')
# Set the variant to RFC 4122.
int &= ~(0xc000 << 48)
int |= 0x8000 << 48
# Set the version number.
int &= ~(0xf000 << 64)
int |= version << 76
self.__dict__['int'] = int
def __eq__(self, other):
if isinstance(other, UUID):
return self.int == other.int
return NotImplemented
# Q. What's the value of being able to sort UUIDs?
# A. Use them as keys in a B-Tree or similar mapping.
def __lt__(self, other):
if isinstance(other, UUID):
return self.int < other.int
return NotImplemented
def __gt__(self, other):
if isinstance(other, UUID):
return self.int > other.int
return NotImplemented
def __le__(self, other):
if isinstance(other, UUID):
return self.int <= other.int
return NotImplemented
def __ge__(self, other):
if isinstance(other, UUID):
return self.int >= other.int
return NotImplemented
def __hash__(self):
return hash(self.int)
def __int__(self):
return self.int
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, str(self))
def __setattr__(self, name, value):
raise TypeError('UUID objects are immutable')
def __str__(self):
hex = '%032x' % self.int
return '%s-%s-%s-%s-%s' % (
hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:])
@property
def bytes(self):
return self.int.to_bytes(16, 'big')
@property
def bytes_le(self):
bytes = self.bytes
return (bytes[4-1::-1] + bytes[6-1:4-1:-1] + bytes[8-1:6-1:-1] +
bytes[8:])
@property
def fields(self):
return (self.time_low, self.time_mid, self.time_hi_version,
self.clock_seq_hi_variant, self.clock_seq_low, self.node)
@property
def time_low(self):
return self.int >> 96
@property
def time_mid(self):
return (self.int >> 80) & 0xffff
@property
def time_hi_version(self):
return (self.int >> 64) & 0xffff
@property
def clock_seq_hi_variant(self):
return (self.int >> 56) & 0xff
@property
def clock_seq_low(self):
return (self.int >> 48) & 0xff
@property
def time(self):
return (((self.time_hi_version & 0x0fff) << 48) |
(self.time_mid << 32) | self.time_low)
@property
def clock_seq(self):
return (((self.clock_seq_hi_variant & 0x3f) << 8) |
self.clock_seq_low)
@property
def node(self):
return self.int & 0xffffffffffff
@property
def hex(self):
return '%032x' % self.int
@property
def urn(self):
return 'urn:uuid:' + str(self)
@property
def variant(self):
if not self.int & (0x8000 << 48):
return RESERVED_NCS
elif not self.int & (0x4000 << 48):
return RFC_4122
elif not self.int & (0x2000 << 48):
return RESERVED_MICROSOFT
else:
return RESERVED_FUTURE
@property
def version(self):
# The version bits are only meaningful for RFC 4122 UUIDs.
if self.variant == RFC_4122:
return int((self.int >> 76) & 0xf)
def _popen(command, *args):
import os, shutil, subprocess
executable = shutil.which(command)
if executable is None:
path = os.pathsep.join(('/sbin', '/usr/sbin'))
executable = shutil.which(command, path=path)
if executable is None:
return None
# LC_ALL=C to ensure English output, stderr=DEVNULL to prevent output
# on stderr (Note: we don't have an example where the words we search
# for are actually localized, but in theory some system could do so.)
env = dict(os.environ)
env['LC_ALL'] = 'C'
proc = subprocess.Popen((executable,) + args,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
env=env)
return proc
def _find_mac(command, args, hw_identifiers, get_index):
try:
proc = _popen(command, *args.split())
if not proc:
return
with proc:
for line in proc.stdout:
words = line.lower().rstrip().split()
for i in range(len(words)):
if words[i] in hw_identifiers:
try:
word = words[get_index(i)]
mac = int(word.replace(b':', b''), 16)
if mac:
return mac
except (ValueError, IndexError):
# Virtual interfaces, such as those provided by
# VPNs, do not have a colon-delimited MAC address
# as expected, but a 16-byte HWAddr separated by
# dashes. These should be ignored in favor of a
# real MAC address
pass
except OSError:
pass
def _ifconfig_getnode():
"""Get the hardware address on Unix by running ifconfig."""
# This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes.
for args in ('', '-a', '-av'):
mac = _find_mac('ifconfig', args, [b'hwaddr', b'ether'], lambda i: i+1)
if mac:
return mac
def _ip_getnode():
"""Get the hardware address on Unix by running ip."""
# This works on Linux with iproute2.
mac = _find_mac('ip', 'link list', [b'link/ether'], lambda i: i+1)
if mac:
return mac
def _arp_getnode():
"""Get the hardware address on Unix by running arp."""
import os, socket
try:
ip_addr = socket.gethostbyname(socket.gethostname())
except OSError:
return None
# Try getting the MAC addr from arp based on our IP address (Solaris).
return _find_mac('arp', '-an', [os.fsencode(ip_addr)], lambda i: -1)
def _lanscan_getnode():
"""Get the hardware address on Unix by running lanscan."""
# This might work on HP-UX.
return _find_mac('lanscan', '-ai', [b'lan0'], lambda i: 0)
def _netstat_getnode():
"""Get the hardware address on Unix by running netstat."""
# This might work on AIX, Tru64 UNIX and presumably on IRIX.
try:
proc = _popen('netstat', '-ia')
if not proc:
return
with proc:
words = proc.stdout.readline().rstrip().split()
try:
i = words.index(b'Address')
except ValueError:
return
for line in proc.stdout:
try:
words = line.rstrip().split()
word = words[i]
if len(word) == 17 and word.count(b':') == 5:
mac = int(word.replace(b':', b''), 16)
if mac:
return mac
except (ValueError, IndexError):
pass
except OSError:
pass
def _ipconfig_getnode():
"""Get the hardware address on Windows by running ipconfig.exe."""
import os, re
dirs = ['', r'c:\windows\system32', r'c:\winnt\system32']
try:
import ctypes
buffer = ctypes.create_string_buffer(300)
ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300)
dirs.insert(0, buffer.value.decode('mbcs'))
except:
pass
for dir in dirs:
try:
pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all')
except OSError:
continue
with pipe:
for line in pipe:
value = line.split(':')[-1].strip().lower()
if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value):
return int(value.replace('-', ''), 16)
def _netbios_getnode():
"""Get the hardware address on Windows using NetBIOS calls.
See http://support.microsoft.com/kb/118623 for details."""
import win32wnet, netbios
ncb = netbios.NCB()
ncb.Command = netbios.NCBENUM
ncb.Buffer = adapters = netbios.LANA_ENUM()
adapters._pack()
if win32wnet.Netbios(ncb) != 0:
return
adapters._unpack()
for i in range(adapters.length):
ncb.Reset()
ncb.Command = netbios.NCBRESET
ncb.Lana_num = ord(adapters.lana[i])
if win32wnet.Netbios(ncb) != 0:
continue
ncb.Reset()
ncb.Command = netbios.NCBASTAT
ncb.Lana_num = ord(adapters.lana[i])
ncb.Callname = '*'.ljust(16)
ncb.Buffer = status = netbios.ADAPTER_STATUS()
if win32wnet.Netbios(ncb) != 0:
continue
status._unpack()
bytes = status.adapter_address[:6]
if len(bytes) != 6:
continue
return int.from_bytes(bytes, 'big')
# Thanks to Thomas Heller for ctypes and for his help with its use here.
# If ctypes is available, use it to find system routines for UUID generation.
# XXX This makes the module non-thread-safe!
_uuid_generate_time = _UuidCreate = None
try:
import ctypes, ctypes.util
import sys
# The uuid_generate_* routines are provided by libuuid on at least
# Linux and FreeBSD, and provided by libc on Mac OS X.
_libnames = ['uuid']
if not sys.platform.startswith('win'):
_libnames.append('c')
for libname in _libnames:
try:
lib = ctypes.CDLL(ctypes.util.find_library(libname))
except Exception:
continue
if hasattr(lib, 'uuid_generate_time'):
_uuid_generate_time = lib.uuid_generate_time
break
del _libnames
# The uuid_generate_* functions are broken on MacOS X 10.5, as noted
# in issue #8621 the function generates the same sequence of values
# in the parent process and all children created using fork (unless
# those children use exec as well).
#
# Assume that the uuid_generate functions are broken from 10.5 onward,
# the test can be adjusted when a later version is fixed.
if sys.platform == 'darwin':
if int(os.uname().release.split('.')[0]) >= 9:
_uuid_generate_time = None
# On Windows prior to 2000, UuidCreate gives a UUID containing the
# hardware address. On Windows 2000 and later, UuidCreate makes a
# random UUID and UuidCreateSequential gives a UUID containing the
# hardware address. These routines are provided by the RPC runtime.
# NOTE: at least on Tim's WinXP Pro SP2 desktop box, while the last
# 6 bytes returned by UuidCreateSequential are fixed, they don't appear
# to bear any relationship to the MAC address of any network device
# on the box.
try:
lib = ctypes.windll.rpcrt4
except:
lib = None
_UuidCreate = getattr(lib, 'UuidCreateSequential',
getattr(lib, 'UuidCreate', None))
except:
pass
def _unixdll_getnode():
"""Get the hardware address on Unix using ctypes."""
_buffer = ctypes.create_string_buffer(16)
_uuid_generate_time(_buffer)
return UUID(bytes=bytes_(_buffer.raw)).node
def _windll_getnode():
"""Get the hardware address on Windows using ctypes."""
_buffer = ctypes.create_string_buffer(16)
if _UuidCreate(_buffer) == 0:
return UUID(bytes=bytes_(_buffer.raw)).node
def _random_getnode():
"""Get a random node ID, with eighth bit set as suggested by RFC 4122."""
import random
return random.getrandbits(48) | 0x010000000000
_node = None
def getnode():
"""Get the hardware address as a 48-bit positive integer.
The first time this runs, it may launch a separate program, which could
be quite slow. If all attempts to obtain the hardware address fail, we
choose a random 48-bit number with its eighth bit set to 1 as recommended
in RFC 4122.
"""
global _node
if _node is not None:
return _node
import sys
if sys.platform == 'win32':
getters = [_windll_getnode, _netbios_getnode, _ipconfig_getnode]
else:
getters = [_unixdll_getnode, _ifconfig_getnode, _ip_getnode,
_arp_getnode, _lanscan_getnode, _netstat_getnode]
for getter in getters + [_random_getnode]:
try:
_node = getter()
except:
continue
if _node is not None:
return _node
_last_timestamp = None
def uuid1(node=None, clock_seq=None):
"""Generate a UUID from a host ID, sequence number, and the current time.
If 'node' is not given, getnode() is used to obtain the hardware
address. If 'clock_seq' is given, it is used as the sequence number;
otherwise a random 14-bit sequence number is chosen."""
# When the system provides a version-1 UUID generator, use it (but don't
# use UuidCreate here because its UUIDs don't conform to RFC 4122).
if _uuid_generate_time and node is clock_seq is None:
_buffer = ctypes.create_string_buffer(16)
_uuid_generate_time(_buffer)
return UUID(bytes=bytes_(_buffer.raw))
global _last_timestamp
import time
nanoseconds = int(time.time() * 1e9)
# 0x01b21dd213814000 is the number of 100-ns intervals between the
# UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
timestamp = int(nanoseconds/100) + 0x01b21dd213814000
if _last_timestamp is not None and timestamp <= _last_timestamp:
timestamp = _last_timestamp + 1
_last_timestamp = timestamp
if clock_seq is None:
import random
clock_seq = random.getrandbits(14) # instead of stable storage
time_low = timestamp & 0xffffffff
time_mid = (timestamp >> 32) & 0xffff
time_hi_version = (timestamp >> 48) & 0x0fff
clock_seq_low = clock_seq & 0xff
clock_seq_hi_variant = (clock_seq >> 8) & 0x3f
if node is None:
node = getnode()
return UUID(fields=(time_low, time_mid, time_hi_version,
clock_seq_hi_variant, clock_seq_low, node), version=1)
def uuid3(namespace, name):
"""Generate a UUID from the MD5 hash of a namespace UUID and a name."""
from hashlib import md5
hash = md5(namespace.bytes + bytes(name, "utf-8")).digest()
return UUID(bytes=hash[:16], version=3)
def uuid4():
"""Generate a random UUID."""
return UUID(bytes=os.urandom(16), version=4)
def uuid5(namespace, name):
"""Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
from hashlib import sha1
hash = sha1(namespace.bytes + bytes(name, "utf-8")).digest()
return UUID(bytes=hash[:16], version=5)
# The following standard UUIDs are for use with uuid3() or uuid5().
NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8')
NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8')
NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8')

View File

@@ -85,6 +85,8 @@ impl ByteInnerNewOptions {
vm.new_type_error("string argument without an encoding".to_string())
);
}
i @ PyBytes => Ok(i.get_value().to_vec()),
j @ PyByteArray => Ok(j.inner.borrow().elements.to_vec()),
obj => {
// TODO: only support this method in the bytes() constructor
if let Some(bytes_method) = vm.get_method(obj.clone(), "__bytes__") {

View File

@@ -1183,6 +1183,22 @@ pub trait PyClassImpl: PyClassDef {
}
}
// TODO: find a better place to put this impl
impl TryFromObject for std::time::Duration {
fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self> {
use std::time::Duration;
u64::try_from_object(vm, obj.clone())
.map(Duration::from_secs)
.or_else(|_| f64::try_from_object(vm, obj.clone()).map(Duration::from_secs_f64))
.map_err(|_| {
vm.new_type_error(format!(
"expected an int or float for duration, got {}",
obj.class()
))
})
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -288,6 +288,13 @@ impl PyBytesIORef {
}
}
fn truncate(self, size: OptionalOption<usize>, vm: &VirtualMachine) -> PyResult<()> {
let mut buffer = self.buffer(vm)?;
let size = size.flat_option().unwrap_or_else(|| buffer.tell() as usize);
buffer.cursor.get_mut().truncate(size);
Ok(())
}
fn closed(self, _vm: &VirtualMachine) -> bool {
self.buffer.borrow().is_none()
}
@@ -1027,6 +1034,7 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
"getvalue" => ctx.new_rustfunc(PyBytesIORef::getvalue),
"tell" => ctx.new_rustfunc(PyBytesIORef::tell),
"readline" => ctx.new_rustfunc(PyBytesIORef::readline),
"truncate" => ctx.new_rustfunc(PyBytesIORef::truncate),
"closed" => ctx.new_property(PyBytesIORef::closed),
"close" => ctx.new_rustfunc(PyBytesIORef::close),
});

View File

@@ -62,7 +62,7 @@ mod c {
pub use winapi::shared::ws2def::*;
pub use winapi::um::winsock2::{
SD_BOTH as SHUT_RDWR, SD_RECEIVE as SHUT_RD, SD_SEND as SHUT_WR, SOCK_DGRAM, SOCK_RAW,
SOCK_RDM, SOCK_STREAM, *,
SOCK_RDM, SOCK_STREAM, SOL_SOCKET, SO_BROADCAST, SO_REUSEADDR, *,
};
}
@@ -179,10 +179,12 @@ impl PySocket {
#[pymethod]
fn recv(&self, bufsize: usize, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
let mut buffer = vec![0u8; bufsize];
match self.sock.borrow_mut().read_exact(&mut buffer) {
Ok(()) => Ok(buffer),
Err(err) => Err(convert_sock_error(vm, err)),
}
let n = self
.sock()
.recv(&mut buffer)
.map_err(|err| convert_sock_error(vm, err))?;
buffer.truncate(n);
Ok(buffer)
}
#[pymethod]
@@ -196,10 +198,12 @@ impl PySocket {
#[pymethod]
fn recvfrom(&self, bufsize: usize, vm: &VirtualMachine) -> PyResult<(Vec<u8>, AddrTuple)> {
let mut buffer = vec![0u8; bufsize];
match self.sock().recv_from(&mut buffer) {
Ok((_, addr)) => Ok((buffer, get_addr_tuple(addr))),
Err(err) => Err(convert_sock_error(vm, err)),
}
let (n, addr) = self
.sock()
.recv_from(&mut buffer)
.map_err(|err| convert_sock_error(vm, err))?;
buffer.truncate(n);
Ok((buffer, get_addr_tuple(addr)))
}
#[pymethod]
@@ -276,16 +280,68 @@ impl PySocket {
}
#[pymethod]
fn settimeout(&self, timeout: Option<f64>, vm: &VirtualMachine) -> PyResult<()> {
fn settimeout(&self, timeout: Option<Duration>, vm: &VirtualMachine) -> PyResult<()> {
// timeout is None: blocking, no timeout
// timeout is 0: non-blocking, no timeout
// otherwise: timeout is timeout, don't change blocking
let (block, timeout) = match timeout {
None => (Some(true), None),
Some(d) if d == Duration::from_secs(0) => (Some(false), None),
Some(d) => (None, Some(d)),
};
self.sock()
.set_read_timeout(timeout.map(Duration::from_secs_f64))
.set_read_timeout(timeout)
.map_err(|err| convert_sock_error(vm, err))?;
self.sock()
.set_write_timeout(timeout.map(Duration::from_secs_f64))
.set_write_timeout(timeout)
.map_err(|err| convert_sock_error(vm, err))?;
if let Some(blocking) = block {
self.sock()
.set_nonblocking(!blocking)
.map_err(|err| convert_sock_error(vm, err))?;
}
Ok(())
}
#[pymethod]
fn setsockopt(
&self,
level: i32,
name: i32,
value: Option<Either<PyBytesLike, i32>>,
optlen: OptionalArg<u32>,
vm: &VirtualMachine,
) -> PyResult<()> {
let fd = sock_fileno(&self.sock()) as _;
let ret = match (value, optlen) {
(Some(Either::A(b)), OptionalArg::Missing) => b.with_ref(|b| unsafe {
c::setsockopt(fd, level, name, b.as_ptr() as *const _, b.len() as _)
}),
(Some(Either::B(ref val)), OptionalArg::Missing) => unsafe {
c::setsockopt(
fd,
level,
name,
val as *const i32 as *const _,
std::mem::size_of::<i32>() as _,
)
},
(None, OptionalArg::Present(optlen)) => unsafe {
c::setsockopt(fd, level, name, std::ptr::null(), optlen as _)
},
_ => {
return Err(
vm.new_type_error("expected the value arg xor the optlen arg".to_string())
);
}
};
if ret < 0 {
Err(convert_sock_error(vm, io::Error::last_os_error()))
} else {
Ok(())
}
}
#[pymethod]
fn shutdown(&self, how: i32, vm: &VirtualMachine) -> PyResult<()> {
let how = match how {
@@ -570,6 +626,10 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
"IPPROTO_IPIP" => ctx.new_int(c::IPPROTO_IP),
"IPPROTO_IPV6" => ctx.new_int(c::IPPROTO_IPV6),
"IPPROTO_NONE" => ctx.new_int(c::IPPROTO_NONE),
"SOL_SOCKET" => ctx.new_int(c::SOL_SOCKET),
"SO_REUSEADDR" => ctx.new_int(c::SO_REUSEADDR),
"TCP_NODELAY" => ctx.new_int(c::TCP_NODELAY),
"SO_BROADCAST" => ctx.new_int(c::SO_BROADCAST),
"socket" => PySocket::make_class(ctx),
"inet_aton" => ctx.new_rustfunc(socket_inet_aton),
"inet_ntoa" => ctx.new_rustfunc(socket_inet_ntoa),

View File

@@ -15,9 +15,8 @@ use crate::pyobject::{Either, PyClassImpl, PyObjectRef, PyResult, TryFromObject}
use crate::vm::VirtualMachine;
#[cfg(unix)]
fn time_sleep(seconds: f64, vm: &VirtualMachine) -> PyResult<()> {
// this is basically std::thread::sleep, but that catches interrupts and we don't want to
let dur = Duration::from_secs_f64(seconds);
fn time_sleep(dur: Duration, vm: &VirtualMachine) -> PyResult<()> {
// this is basically std::thread::sleep, but that catches interrupts and we don't want to;
let mut ts = libc::timespec {
tv_sec: std::cmp::min(libc::time_t::max_value() as u64, dur.as_secs()) as libc::time_t,
@@ -34,8 +33,8 @@ fn time_sleep(seconds: f64, vm: &VirtualMachine) -> PyResult<()> {
}
#[cfg(not(unix))]
fn time_sleep(seconds: f64, _vm: &VirtualMachine) {
std::thread::sleep(Duration::from_secs_f64(seconds));
fn time_sleep(dur: Duration, _vm: &VirtualMachine) {
std::thread::sleep(dur);
}
#[cfg(any(not(target_arch = "wasm32"), target_os = "wasi"))]