forked from Rust-related/RustPython
Merge pull request #1643 from RustPython/coolreader18/http
Add the http module
This commit is contained in:
713
Lib/calendar.py
vendored
Normal file
713
Lib/calendar.py
vendored
Normal file
@@ -0,0 +1,713 @@
|
||||
"""Calendar printing functions
|
||||
|
||||
Note when comparing these calendars to the ones printed by cal(1): By
|
||||
default, these calendars have Monday as the first day of the week, and
|
||||
Sunday as the last (the European convention). Use setfirstweekday() to
|
||||
set the first day of the week (0=Monday, 6=Sunday)."""
|
||||
|
||||
import sys
|
||||
import datetime
|
||||
import locale as _locale
|
||||
from itertools import repeat
|
||||
|
||||
__all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday",
|
||||
"firstweekday", "isleap", "leapdays", "weekday", "monthrange",
|
||||
"monthcalendar", "prmonth", "month", "prcal", "calendar",
|
||||
"timegm", "month_name", "month_abbr", "day_name", "day_abbr",
|
||||
"Calendar", "TextCalendar", "HTMLCalendar", "LocaleTextCalendar",
|
||||
"LocaleHTMLCalendar", "weekheader"]
|
||||
|
||||
# Exception raised for bad input (with string parameter for details)
|
||||
error = ValueError
|
||||
|
||||
# Exceptions raised for bad input
|
||||
class IllegalMonthError(ValueError):
|
||||
def __init__(self, month):
|
||||
self.month = month
|
||||
def __str__(self):
|
||||
return "bad month number %r; must be 1-12" % self.month
|
||||
|
||||
|
||||
class IllegalWeekdayError(ValueError):
|
||||
def __init__(self, weekday):
|
||||
self.weekday = weekday
|
||||
def __str__(self):
|
||||
return "bad weekday number %r; must be 0 (Monday) to 6 (Sunday)" % self.weekday
|
||||
|
||||
|
||||
# Constants for months referenced later
|
||||
January = 1
|
||||
February = 2
|
||||
|
||||
# Number of days per month (except for February in leap years)
|
||||
mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
||||
|
||||
# This module used to have hard-coded lists of day and month names, as
|
||||
# English strings. The classes following emulate a read-only version of
|
||||
# that, but supply localized names. Note that the values are computed
|
||||
# fresh on each call, in case the user changes locale between calls.
|
||||
|
||||
class _localized_month:
|
||||
|
||||
_months = [datetime.date(2001, i+1, 1).strftime for i in range(12)]
|
||||
_months.insert(0, lambda x: "")
|
||||
|
||||
def __init__(self, format):
|
||||
self.format = format
|
||||
|
||||
def __getitem__(self, i):
|
||||
funcs = self._months[i]
|
||||
if isinstance(i, slice):
|
||||
return [f(self.format) for f in funcs]
|
||||
else:
|
||||
return funcs(self.format)
|
||||
|
||||
def __len__(self):
|
||||
return 13
|
||||
|
||||
|
||||
class _localized_day:
|
||||
|
||||
# January 1, 2001, was a Monday.
|
||||
_days = [datetime.date(2001, 1, i+1).strftime for i in range(7)]
|
||||
|
||||
def __init__(self, format):
|
||||
self.format = format
|
||||
|
||||
def __getitem__(self, i):
|
||||
funcs = self._days[i]
|
||||
if isinstance(i, slice):
|
||||
return [f(self.format) for f in funcs]
|
||||
else:
|
||||
return funcs(self.format)
|
||||
|
||||
def __len__(self):
|
||||
return 7
|
||||
|
||||
|
||||
# Full and abbreviated names of weekdays
|
||||
day_name = _localized_day('%A')
|
||||
day_abbr = _localized_day('%a')
|
||||
|
||||
# Full and abbreviated names of months (1-based arrays!!!)
|
||||
month_name = _localized_month('%B')
|
||||
month_abbr = _localized_month('%b')
|
||||
|
||||
# Constants for weekdays
|
||||
(MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) = range(7)
|
||||
|
||||
|
||||
def isleap(year):
|
||||
"""Return True for leap years, False for non-leap years."""
|
||||
return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
|
||||
|
||||
|
||||
def leapdays(y1, y2):
|
||||
"""Return number of leap years in range [y1, y2).
|
||||
Assume y1 <= y2."""
|
||||
y1 -= 1
|
||||
y2 -= 1
|
||||
return (y2//4 - y1//4) - (y2//100 - y1//100) + (y2//400 - y1//400)
|
||||
|
||||
|
||||
def weekday(year, month, day):
|
||||
"""Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month (1-12),
|
||||
day (1-31)."""
|
||||
return datetime.date(year, month, day).weekday()
|
||||
|
||||
|
||||
def monthrange(year, month):
|
||||
"""Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for
|
||||
year, month."""
|
||||
if not 1 <= month <= 12:
|
||||
raise IllegalMonthError(month)
|
||||
day1 = weekday(year, month, 1)
|
||||
ndays = mdays[month] + (month == February and isleap(year))
|
||||
return day1, ndays
|
||||
|
||||
|
||||
class Calendar(object):
|
||||
"""
|
||||
Base calendar class. This class doesn't do any formatting. It simply
|
||||
provides data to subclasses.
|
||||
"""
|
||||
|
||||
def __init__(self, firstweekday=0):
|
||||
self.firstweekday = firstweekday # 0 = Monday, 6 = Sunday
|
||||
|
||||
def getfirstweekday(self):
|
||||
return self._firstweekday % 7
|
||||
|
||||
def setfirstweekday(self, firstweekday):
|
||||
self._firstweekday = firstweekday
|
||||
|
||||
firstweekday = property(getfirstweekday, setfirstweekday)
|
||||
|
||||
def iterweekdays(self):
|
||||
"""
|
||||
Return an iterator for one week of weekday numbers starting with the
|
||||
configured first one.
|
||||
"""
|
||||
for i in range(self.firstweekday, self.firstweekday + 7):
|
||||
yield i%7
|
||||
|
||||
def itermonthdates(self, year, month):
|
||||
"""
|
||||
Return an iterator for one month. The iterator will yield datetime.date
|
||||
values and will always iterate through complete weeks, so it will yield
|
||||
dates outside the specified month.
|
||||
"""
|
||||
date = datetime.date(year, month, 1)
|
||||
# Go back to the beginning of the week
|
||||
days = (date.weekday() - self.firstweekday) % 7
|
||||
date -= datetime.timedelta(days=days)
|
||||
oneday = datetime.timedelta(days=1)
|
||||
while True:
|
||||
yield date
|
||||
try:
|
||||
date += oneday
|
||||
except OverflowError:
|
||||
# Adding one day could fail after datetime.MAXYEAR
|
||||
break
|
||||
if date.month != month and date.weekday() == self.firstweekday:
|
||||
break
|
||||
|
||||
def itermonthdays2(self, year, month):
|
||||
"""
|
||||
Like itermonthdates(), but will yield (day number, weekday number)
|
||||
tuples. For days outside the specified month the day number is 0.
|
||||
"""
|
||||
for i, d in enumerate(self.itermonthdays(year, month), self.firstweekday):
|
||||
yield d, i % 7
|
||||
|
||||
def itermonthdays(self, year, month):
|
||||
"""
|
||||
Like itermonthdates(), but will yield day numbers. For days outside
|
||||
the specified month the day number is 0.
|
||||
"""
|
||||
day1, ndays = monthrange(year, month)
|
||||
days_before = (day1 - self.firstweekday) % 7
|
||||
yield from repeat(0, days_before)
|
||||
yield from range(1, ndays + 1)
|
||||
days_after = (self.firstweekday - day1 - ndays) % 7
|
||||
yield from repeat(0, days_after)
|
||||
|
||||
def monthdatescalendar(self, year, month):
|
||||
"""
|
||||
Return a matrix (list of lists) representing a month's calendar.
|
||||
Each row represents a week; week entries are datetime.date values.
|
||||
"""
|
||||
dates = list(self.itermonthdates(year, month))
|
||||
return [ dates[i:i+7] for i in range(0, len(dates), 7) ]
|
||||
|
||||
def monthdays2calendar(self, year, month):
|
||||
"""
|
||||
Return a matrix representing a month's calendar.
|
||||
Each row represents a week; week entries are
|
||||
(day number, weekday number) tuples. Day numbers outside this month
|
||||
are zero.
|
||||
"""
|
||||
days = list(self.itermonthdays2(year, month))
|
||||
return [ days[i:i+7] for i in range(0, len(days), 7) ]
|
||||
|
||||
def monthdayscalendar(self, year, month):
|
||||
"""
|
||||
Return a matrix representing a month's calendar.
|
||||
Each row represents a week; days outside this month are zero.
|
||||
"""
|
||||
days = list(self.itermonthdays(year, month))
|
||||
return [ days[i:i+7] for i in range(0, len(days), 7) ]
|
||||
|
||||
def yeardatescalendar(self, year, width=3):
|
||||
"""
|
||||
Return the data for the specified year ready for formatting. The return
|
||||
value is a list of month rows. Each month row contains up to width months.
|
||||
Each month contains between 4 and 6 weeks and each week contains 1-7
|
||||
days. Days are datetime.date objects.
|
||||
"""
|
||||
months = [
|
||||
self.monthdatescalendar(year, i)
|
||||
for i in range(January, January+12)
|
||||
]
|
||||
return [months[i:i+width] for i in range(0, len(months), width) ]
|
||||
|
||||
def yeardays2calendar(self, year, width=3):
|
||||
"""
|
||||
Return the data for the specified year ready for formatting (similar to
|
||||
yeardatescalendar()). Entries in the week lists are
|
||||
(day number, weekday number) tuples. Day numbers outside this month are
|
||||
zero.
|
||||
"""
|
||||
months = [
|
||||
self.monthdays2calendar(year, i)
|
||||
for i in range(January, January+12)
|
||||
]
|
||||
return [months[i:i+width] for i in range(0, len(months), width) ]
|
||||
|
||||
def yeardayscalendar(self, year, width=3):
|
||||
"""
|
||||
Return the data for the specified year ready for formatting (similar to
|
||||
yeardatescalendar()). Entries in the week lists are day numbers.
|
||||
Day numbers outside this month are zero.
|
||||
"""
|
||||
months = [
|
||||
self.monthdayscalendar(year, i)
|
||||
for i in range(January, January+12)
|
||||
]
|
||||
return [months[i:i+width] for i in range(0, len(months), width) ]
|
||||
|
||||
|
||||
class TextCalendar(Calendar):
|
||||
"""
|
||||
Subclass of Calendar that outputs a calendar as a simple plain text
|
||||
similar to the UNIX program cal.
|
||||
"""
|
||||
|
||||
def prweek(self, theweek, width):
|
||||
"""
|
||||
Print a single week (no newline).
|
||||
"""
|
||||
print(self.formatweek(theweek, width), end=' ')
|
||||
|
||||
def formatday(self, day, weekday, width):
|
||||
"""
|
||||
Returns a formatted day.
|
||||
"""
|
||||
if day == 0:
|
||||
s = ''
|
||||
else:
|
||||
s = '%2i' % day # right-align single-digit days
|
||||
return s.center(width)
|
||||
|
||||
def formatweek(self, theweek, width):
|
||||
"""
|
||||
Returns a single week in a string (no newline).
|
||||
"""
|
||||
return ' '.join(self.formatday(d, wd, width) for (d, wd) in theweek)
|
||||
|
||||
def formatweekday(self, day, width):
|
||||
"""
|
||||
Returns a formatted week day name.
|
||||
"""
|
||||
if width >= 9:
|
||||
names = day_name
|
||||
else:
|
||||
names = day_abbr
|
||||
return names[day][:width].center(width)
|
||||
|
||||
def formatweekheader(self, width):
|
||||
"""
|
||||
Return a header for a week.
|
||||
"""
|
||||
return ' '.join(self.formatweekday(i, width) for i in self.iterweekdays())
|
||||
|
||||
def formatmonthname(self, theyear, themonth, width, withyear=True):
|
||||
"""
|
||||
Return a formatted month name.
|
||||
"""
|
||||
s = month_name[themonth]
|
||||
if withyear:
|
||||
s = "%s %r" % (s, theyear)
|
||||
return s.center(width)
|
||||
|
||||
def prmonth(self, theyear, themonth, w=0, l=0):
|
||||
"""
|
||||
Print a month's calendar.
|
||||
"""
|
||||
print(self.formatmonth(theyear, themonth, w, l), end='')
|
||||
|
||||
def formatmonth(self, theyear, themonth, w=0, l=0):
|
||||
"""
|
||||
Return a month's calendar string (multi-line).
|
||||
"""
|
||||
w = max(2, w)
|
||||
l = max(1, l)
|
||||
s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1)
|
||||
s = s.rstrip()
|
||||
s += '\n' * l
|
||||
s += self.formatweekheader(w).rstrip()
|
||||
s += '\n' * l
|
||||
for week in self.monthdays2calendar(theyear, themonth):
|
||||
s += self.formatweek(week, w).rstrip()
|
||||
s += '\n' * l
|
||||
return s
|
||||
|
||||
def formatyear(self, theyear, w=2, l=1, c=6, m=3):
|
||||
"""
|
||||
Returns a year's calendar as a multi-line string.
|
||||
"""
|
||||
w = max(2, w)
|
||||
l = max(1, l)
|
||||
c = max(2, c)
|
||||
colwidth = (w + 1) * 7 - 1
|
||||
v = []
|
||||
a = v.append
|
||||
a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip())
|
||||
a('\n'*l)
|
||||
header = self.formatweekheader(w)
|
||||
for (i, row) in enumerate(self.yeardays2calendar(theyear, m)):
|
||||
# months in this row
|
||||
months = range(m*i+1, min(m*(i+1)+1, 13))
|
||||
a('\n'*l)
|
||||
names = (self.formatmonthname(theyear, k, colwidth, False)
|
||||
for k in months)
|
||||
a(formatstring(names, colwidth, c).rstrip())
|
||||
a('\n'*l)
|
||||
headers = (header for k in months)
|
||||
a(formatstring(headers, colwidth, c).rstrip())
|
||||
a('\n'*l)
|
||||
# max number of weeks for this row
|
||||
height = max(len(cal) for cal in row)
|
||||
for j in range(height):
|
||||
weeks = []
|
||||
for cal in row:
|
||||
if j >= len(cal):
|
||||
weeks.append('')
|
||||
else:
|
||||
weeks.append(self.formatweek(cal[j], w))
|
||||
a(formatstring(weeks, colwidth, c).rstrip())
|
||||
a('\n' * l)
|
||||
return ''.join(v)
|
||||
|
||||
def pryear(self, theyear, w=0, l=0, c=6, m=3):
|
||||
"""Print a year's calendar."""
|
||||
print(self.formatyear(theyear, w, l, c, m))
|
||||
|
||||
|
||||
class HTMLCalendar(Calendar):
|
||||
"""
|
||||
This calendar returns complete HTML pages.
|
||||
"""
|
||||
|
||||
# CSS classes for the day <td>s
|
||||
cssclasses = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"]
|
||||
|
||||
def formatday(self, day, weekday):
|
||||
"""
|
||||
Return a day as a table cell.
|
||||
"""
|
||||
if day == 0:
|
||||
return '<td class="noday"> </td>' # day outside month
|
||||
else:
|
||||
return '<td class="%s">%d</td>' % (self.cssclasses[weekday], day)
|
||||
|
||||
def formatweek(self, theweek):
|
||||
"""
|
||||
Return a complete week as a table row.
|
||||
"""
|
||||
s = ''.join(self.formatday(d, wd) for (d, wd) in theweek)
|
||||
return '<tr>%s</tr>' % s
|
||||
|
||||
def formatweekday(self, day):
|
||||
"""
|
||||
Return a weekday name as a table header.
|
||||
"""
|
||||
return '<th class="%s">%s</th>' % (self.cssclasses[day], day_abbr[day])
|
||||
|
||||
def formatweekheader(self):
|
||||
"""
|
||||
Return a header for a week as a table row.
|
||||
"""
|
||||
s = ''.join(self.formatweekday(i) for i in self.iterweekdays())
|
||||
return '<tr>%s</tr>' % s
|
||||
|
||||
def formatmonthname(self, theyear, themonth, withyear=True):
|
||||
"""
|
||||
Return a month name as a table row.
|
||||
"""
|
||||
if withyear:
|
||||
s = '%s %s' % (month_name[themonth], theyear)
|
||||
else:
|
||||
s = '%s' % month_name[themonth]
|
||||
return '<tr><th colspan="7" class="month">%s</th></tr>' % s
|
||||
|
||||
def formatmonth(self, theyear, themonth, withyear=True):
|
||||
"""
|
||||
Return a formatted month as a table.
|
||||
"""
|
||||
v = []
|
||||
a = v.append
|
||||
a('<table border="0" cellpadding="0" cellspacing="0" class="month">')
|
||||
a('\n')
|
||||
a(self.formatmonthname(theyear, themonth, withyear=withyear))
|
||||
a('\n')
|
||||
a(self.formatweekheader())
|
||||
a('\n')
|
||||
for week in self.monthdays2calendar(theyear, themonth):
|
||||
a(self.formatweek(week))
|
||||
a('\n')
|
||||
a('</table>')
|
||||
a('\n')
|
||||
return ''.join(v)
|
||||
|
||||
def formatyear(self, theyear, width=3):
|
||||
"""
|
||||
Return a formatted year as a table of tables.
|
||||
"""
|
||||
v = []
|
||||
a = v.append
|
||||
width = max(width, 1)
|
||||
a('<table border="0" cellpadding="0" cellspacing="0" class="year">')
|
||||
a('\n')
|
||||
a('<tr><th colspan="%d" class="year">%s</th></tr>' % (width, theyear))
|
||||
for i in range(January, January+12, width):
|
||||
# months in this row
|
||||
months = range(i, min(i+width, 13))
|
||||
a('<tr>')
|
||||
for m in months:
|
||||
a('<td>')
|
||||
a(self.formatmonth(theyear, m, withyear=False))
|
||||
a('</td>')
|
||||
a('</tr>')
|
||||
a('</table>')
|
||||
return ''.join(v)
|
||||
|
||||
def formatyearpage(self, theyear, width=3, css='calendar.css', encoding=None):
|
||||
"""
|
||||
Return a formatted year as a complete HTML page.
|
||||
"""
|
||||
if encoding is None:
|
||||
encoding = sys.getdefaultencoding()
|
||||
v = []
|
||||
a = v.append
|
||||
a('<?xml version="1.0" encoding="%s"?>\n' % encoding)
|
||||
a('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n')
|
||||
a('<html>\n')
|
||||
a('<head>\n')
|
||||
a('<meta http-equiv="Content-Type" content="text/html; charset=%s" />\n' % encoding)
|
||||
if css is not None:
|
||||
a('<link rel="stylesheet" type="text/css" href="%s" />\n' % css)
|
||||
a('<title>Calendar for %d</title>\n' % theyear)
|
||||
a('</head>\n')
|
||||
a('<body>\n')
|
||||
a(self.formatyear(theyear, width))
|
||||
a('</body>\n')
|
||||
a('</html>\n')
|
||||
return ''.join(v).encode(encoding, "xmlcharrefreplace")
|
||||
|
||||
|
||||
class different_locale:
|
||||
def __init__(self, locale):
|
||||
self.locale = locale
|
||||
|
||||
def __enter__(self):
|
||||
self.oldlocale = _locale.getlocale(_locale.LC_TIME)
|
||||
_locale.setlocale(_locale.LC_TIME, self.locale)
|
||||
|
||||
def __exit__(self, *args):
|
||||
_locale.setlocale(_locale.LC_TIME, self.oldlocale)
|
||||
|
||||
|
||||
class LocaleTextCalendar(TextCalendar):
|
||||
"""
|
||||
This class can be passed a locale name in the constructor and will return
|
||||
month and weekday names in the specified locale. If this locale includes
|
||||
an encoding all strings containing month and weekday names will be returned
|
||||
as unicode.
|
||||
"""
|
||||
|
||||
def __init__(self, firstweekday=0, locale=None):
|
||||
TextCalendar.__init__(self, firstweekday)
|
||||
if locale is None:
|
||||
locale = _locale.getdefaultlocale()
|
||||
self.locale = locale
|
||||
|
||||
def formatweekday(self, day, width):
|
||||
with different_locale(self.locale):
|
||||
if width >= 9:
|
||||
names = day_name
|
||||
else:
|
||||
names = day_abbr
|
||||
name = names[day]
|
||||
return name[:width].center(width)
|
||||
|
||||
def formatmonthname(self, theyear, themonth, width, withyear=True):
|
||||
with different_locale(self.locale):
|
||||
s = month_name[themonth]
|
||||
if withyear:
|
||||
s = "%s %r" % (s, theyear)
|
||||
return s.center(width)
|
||||
|
||||
|
||||
class LocaleHTMLCalendar(HTMLCalendar):
|
||||
"""
|
||||
This class can be passed a locale name in the constructor and will return
|
||||
month and weekday names in the specified locale. If this locale includes
|
||||
an encoding all strings containing month and weekday names will be returned
|
||||
as unicode.
|
||||
"""
|
||||
def __init__(self, firstweekday=0, locale=None):
|
||||
HTMLCalendar.__init__(self, firstweekday)
|
||||
if locale is None:
|
||||
locale = _locale.getdefaultlocale()
|
||||
self.locale = locale
|
||||
|
||||
def formatweekday(self, day):
|
||||
with different_locale(self.locale):
|
||||
s = day_abbr[day]
|
||||
return '<th class="%s">%s</th>' % (self.cssclasses[day], s)
|
||||
|
||||
def formatmonthname(self, theyear, themonth, withyear=True):
|
||||
with different_locale(self.locale):
|
||||
s = month_name[themonth]
|
||||
if withyear:
|
||||
s = '%s %s' % (s, theyear)
|
||||
return '<tr><th colspan="7" class="month">%s</th></tr>' % s
|
||||
|
||||
|
||||
# Support for old module level interface
|
||||
c = TextCalendar()
|
||||
|
||||
firstweekday = c.getfirstweekday
|
||||
|
||||
def setfirstweekday(firstweekday):
|
||||
if not MONDAY <= firstweekday <= SUNDAY:
|
||||
raise IllegalWeekdayError(firstweekday)
|
||||
c.firstweekday = firstweekday
|
||||
|
||||
monthcalendar = c.monthdayscalendar
|
||||
prweek = c.prweek
|
||||
week = c.formatweek
|
||||
weekheader = c.formatweekheader
|
||||
prmonth = c.prmonth
|
||||
month = c.formatmonth
|
||||
calendar = c.formatyear
|
||||
prcal = c.pryear
|
||||
|
||||
|
||||
# Spacing of month columns for multi-column year calendar
|
||||
_colwidth = 7*3 - 1 # Amount printed by prweek()
|
||||
_spacing = 6 # Number of spaces between columns
|
||||
|
||||
|
||||
def format(cols, colwidth=_colwidth, spacing=_spacing):
|
||||
"""Prints multi-column formatting for year calendars"""
|
||||
print(formatstring(cols, colwidth, spacing))
|
||||
|
||||
|
||||
def formatstring(cols, colwidth=_colwidth, spacing=_spacing):
|
||||
"""Returns a string formatted from n strings, centered within n columns."""
|
||||
spacing *= ' '
|
||||
return spacing.join(c.center(colwidth) for c in cols)
|
||||
|
||||
|
||||
EPOCH = 1970
|
||||
_EPOCH_ORD = datetime.date(EPOCH, 1, 1).toordinal()
|
||||
|
||||
|
||||
def timegm(tuple):
|
||||
"""Unrelated but handy function to calculate Unix timestamp from GMT."""
|
||||
year, month, day, hour, minute, second = tuple[:6]
|
||||
days = datetime.date(year, month, 1).toordinal() - _EPOCH_ORD + day - 1
|
||||
hours = days*24 + hour
|
||||
minutes = hours*60 + minute
|
||||
seconds = minutes*60 + second
|
||||
return seconds
|
||||
|
||||
|
||||
def main(args):
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
textgroup = parser.add_argument_group('text only arguments')
|
||||
htmlgroup = parser.add_argument_group('html only arguments')
|
||||
textgroup.add_argument(
|
||||
"-w", "--width",
|
||||
type=int, default=2,
|
||||
help="width of date column (default 2)"
|
||||
)
|
||||
textgroup.add_argument(
|
||||
"-l", "--lines",
|
||||
type=int, default=1,
|
||||
help="number of lines for each week (default 1)"
|
||||
)
|
||||
textgroup.add_argument(
|
||||
"-s", "--spacing",
|
||||
type=int, default=6,
|
||||
help="spacing between months (default 6)"
|
||||
)
|
||||
textgroup.add_argument(
|
||||
"-m", "--months",
|
||||
type=int, default=3,
|
||||
help="months per row (default 3)"
|
||||
)
|
||||
htmlgroup.add_argument(
|
||||
"-c", "--css",
|
||||
default="calendar.css",
|
||||
help="CSS to use for page"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-L", "--locale",
|
||||
default=None,
|
||||
help="locale to be used from month and weekday names"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e", "--encoding",
|
||||
default=None,
|
||||
help="encoding to use for output"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t", "--type",
|
||||
default="text",
|
||||
choices=("text", "html"),
|
||||
help="output type (text or html)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"year",
|
||||
nargs='?', type=int,
|
||||
help="year number (1-9999)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"month",
|
||||
nargs='?', type=int,
|
||||
help="month number (1-12, text only)"
|
||||
)
|
||||
|
||||
options = parser.parse_args(args[1:])
|
||||
|
||||
if options.locale and not options.encoding:
|
||||
parser.error("if --locale is specified --encoding is required")
|
||||
sys.exit(1)
|
||||
|
||||
locale = options.locale, options.encoding
|
||||
|
||||
if options.type == "html":
|
||||
if options.locale:
|
||||
cal = LocaleHTMLCalendar(locale=locale)
|
||||
else:
|
||||
cal = HTMLCalendar()
|
||||
encoding = options.encoding
|
||||
if encoding is None:
|
||||
encoding = sys.getdefaultencoding()
|
||||
optdict = dict(encoding=encoding, css=options.css)
|
||||
write = sys.stdout.buffer.write
|
||||
if options.year is None:
|
||||
write(cal.formatyearpage(datetime.date.today().year, **optdict))
|
||||
elif options.month is None:
|
||||
write(cal.formatyearpage(options.year, **optdict))
|
||||
else:
|
||||
parser.error("incorrect number of arguments")
|
||||
sys.exit(1)
|
||||
else:
|
||||
if options.locale:
|
||||
cal = LocaleTextCalendar(locale=locale)
|
||||
else:
|
||||
cal = TextCalendar()
|
||||
optdict = dict(w=options.width, l=options.lines)
|
||||
if options.month is None:
|
||||
optdict["c"] = options.spacing
|
||||
optdict["m"] = options.months
|
||||
if options.year is None:
|
||||
result = cal.formatyear(datetime.date.today().year, **optdict)
|
||||
elif options.month is None:
|
||||
result = cal.formatyear(options.year, **optdict)
|
||||
else:
|
||||
result = cal.formatmonth(options.year, options.month, **optdict)
|
||||
write = sys.stdout.write
|
||||
if options.encoding:
|
||||
result = result.encode(options.encoding)
|
||||
write = sys.stdout.buffer.write
|
||||
write(result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
||||
314
Lib/code.py
vendored
Normal file
314
Lib/code.py
vendored
Normal file
@@ -0,0 +1,314 @@
|
||||
"""Utilities needed to emulate Python's interactive interpreter.
|
||||
|
||||
"""
|
||||
|
||||
# Inspired by similar code by Jeff Epler and Fredrik Lundh.
|
||||
|
||||
|
||||
import sys
|
||||
import traceback
|
||||
import argparse
|
||||
from codeop import CommandCompiler, compile_command
|
||||
|
||||
__all__ = ["InteractiveInterpreter", "InteractiveConsole", "interact",
|
||||
"compile_command"]
|
||||
|
||||
class InteractiveInterpreter:
|
||||
"""Base class for InteractiveConsole.
|
||||
|
||||
This class deals with parsing and interpreter state (the user's
|
||||
namespace); it doesn't deal with input buffering or prompting or
|
||||
input file naming (the filename is always passed in explicitly).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, locals=None):
|
||||
"""Constructor.
|
||||
|
||||
The optional 'locals' argument specifies the dictionary in
|
||||
which code will be executed; it defaults to a newly created
|
||||
dictionary with key "__name__" set to "__console__" and key
|
||||
"__doc__" set to None.
|
||||
|
||||
"""
|
||||
if locals is None:
|
||||
locals = {"__name__": "__console__", "__doc__": None}
|
||||
self.locals = locals
|
||||
self.compile = CommandCompiler()
|
||||
|
||||
def runsource(self, source, filename="<input>", symbol="single"):
|
||||
"""Compile and run some source in the interpreter.
|
||||
|
||||
Arguments are as for compile_command().
|
||||
|
||||
One several things can happen:
|
||||
|
||||
1) The input is incorrect; compile_command() raised an
|
||||
exception (SyntaxError or OverflowError). A syntax traceback
|
||||
will be printed by calling the showsyntaxerror() method.
|
||||
|
||||
2) The input is incomplete, and more input is required;
|
||||
compile_command() returned None. Nothing happens.
|
||||
|
||||
3) The input is complete; compile_command() returned a code
|
||||
object. The code is executed by calling self.runcode() (which
|
||||
also handles run-time exceptions, except for SystemExit).
|
||||
|
||||
The return value is True in case 2, False in the other cases (unless
|
||||
an exception is raised). The return value can be used to
|
||||
decide whether to use sys.ps1 or sys.ps2 to prompt the next
|
||||
line.
|
||||
|
||||
"""
|
||||
try:
|
||||
code = self.compile(source, filename, symbol)
|
||||
except (OverflowError, SyntaxError, ValueError):
|
||||
# Case 1
|
||||
self.showsyntaxerror(filename)
|
||||
return False
|
||||
|
||||
if code is None:
|
||||
# Case 2
|
||||
return True
|
||||
|
||||
# Case 3
|
||||
self.runcode(code)
|
||||
return False
|
||||
|
||||
def runcode(self, code):
|
||||
"""Execute a code object.
|
||||
|
||||
When an exception occurs, self.showtraceback() is called to
|
||||
display a traceback. All exceptions are caught except
|
||||
SystemExit, which is reraised.
|
||||
|
||||
A note about KeyboardInterrupt: this exception may occur
|
||||
elsewhere in this code, and may not always be caught. The
|
||||
caller should be prepared to deal with it.
|
||||
|
||||
"""
|
||||
try:
|
||||
exec(code, self.locals)
|
||||
except SystemExit:
|
||||
raise
|
||||
except:
|
||||
self.showtraceback()
|
||||
|
||||
def showsyntaxerror(self, filename=None):
|
||||
"""Display the syntax error that just occurred.
|
||||
|
||||
This doesn't display a stack trace because there isn't one.
|
||||
|
||||
If a filename is given, it is stuffed in the exception instead
|
||||
of what was there before (because Python's parser always uses
|
||||
"<string>" when reading from a string).
|
||||
|
||||
The output is written by self.write(), below.
|
||||
|
||||
"""
|
||||
type, value, tb = sys.exc_info()
|
||||
sys.last_type = type
|
||||
sys.last_value = value
|
||||
sys.last_traceback = tb
|
||||
if filename and type is SyntaxError:
|
||||
# Work hard to stuff the correct filename in the exception
|
||||
try:
|
||||
msg, (dummy_filename, lineno, offset, line) = value.args
|
||||
except ValueError:
|
||||
# Not the format we expect; leave it alone
|
||||
pass
|
||||
else:
|
||||
# Stuff in the right filename
|
||||
value = SyntaxError(msg, (filename, lineno, offset, line))
|
||||
sys.last_value = value
|
||||
if sys.excepthook is sys.__excepthook__:
|
||||
lines = traceback.format_exception_only(type, value)
|
||||
self.write(''.join(lines))
|
||||
else:
|
||||
# If someone has set sys.excepthook, we let that take precedence
|
||||
# over self.write
|
||||
sys.excepthook(type, value, tb)
|
||||
|
||||
def showtraceback(self):
|
||||
"""Display the exception that just occurred.
|
||||
|
||||
We remove the first stack item because it is our own code.
|
||||
|
||||
The output is written by self.write(), below.
|
||||
|
||||
"""
|
||||
sys.last_type, sys.last_value, last_tb = ei = sys.exc_info()
|
||||
sys.last_traceback = last_tb
|
||||
try:
|
||||
lines = traceback.format_exception(ei[0], ei[1], last_tb.tb_next)
|
||||
if sys.excepthook is sys.__excepthook__:
|
||||
self.write(''.join(lines))
|
||||
else:
|
||||
# If someone has set sys.excepthook, we let that take precedence
|
||||
# over self.write
|
||||
sys.excepthook(ei[0], ei[1], last_tb)
|
||||
finally:
|
||||
last_tb = ei = None
|
||||
|
||||
def write(self, data):
|
||||
"""Write a string.
|
||||
|
||||
The base implementation writes to sys.stderr; a subclass may
|
||||
replace this with a different implementation.
|
||||
|
||||
"""
|
||||
sys.stderr.write(data)
|
||||
|
||||
|
||||
class InteractiveConsole(InteractiveInterpreter):
|
||||
"""Closely emulate the behavior of the interactive Python interpreter.
|
||||
|
||||
This class builds on InteractiveInterpreter and adds prompting
|
||||
using the familiar sys.ps1 and sys.ps2, and input buffering.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, locals=None, filename="<console>"):
|
||||
"""Constructor.
|
||||
|
||||
The optional locals argument will be passed to the
|
||||
InteractiveInterpreter base class.
|
||||
|
||||
The optional filename argument should specify the (file)name
|
||||
of the input stream; it will show up in tracebacks.
|
||||
|
||||
"""
|
||||
InteractiveInterpreter.__init__(self, locals)
|
||||
self.filename = filename
|
||||
self.resetbuffer()
|
||||
|
||||
def resetbuffer(self):
|
||||
"""Reset the input buffer."""
|
||||
self.buffer = []
|
||||
|
||||
def interact(self, banner=None, exitmsg=None):
|
||||
"""Closely emulate the interactive Python console.
|
||||
|
||||
The optional banner argument specifies the banner to print
|
||||
before the first interaction; by default it prints a banner
|
||||
similar to the one printed by the real Python interpreter,
|
||||
followed by the current class name in parentheses (so as not
|
||||
to confuse this with the real interpreter -- since it's so
|
||||
close!).
|
||||
|
||||
The optional exitmsg argument specifies the exit message
|
||||
printed when exiting. Pass the empty string to suppress
|
||||
printing an exit message. If exitmsg is not given or None,
|
||||
a default message is printed.
|
||||
|
||||
"""
|
||||
try:
|
||||
sys.ps1
|
||||
except AttributeError:
|
||||
sys.ps1 = ">>> "
|
||||
try:
|
||||
sys.ps2
|
||||
except AttributeError:
|
||||
sys.ps2 = "... "
|
||||
cprt = 'Type "help", "copyright", "credits" or "license" for more information.'
|
||||
if banner is None:
|
||||
self.write("Python %s on %s\n%s\n(%s)\n" %
|
||||
(sys.version, sys.platform, cprt,
|
||||
self.__class__.__name__))
|
||||
elif banner:
|
||||
self.write("%s\n" % str(banner))
|
||||
more = 0
|
||||
while 1:
|
||||
try:
|
||||
if more:
|
||||
prompt = sys.ps2
|
||||
else:
|
||||
prompt = sys.ps1
|
||||
try:
|
||||
line = self.raw_input(prompt)
|
||||
except EOFError:
|
||||
self.write("\n")
|
||||
break
|
||||
else:
|
||||
more = self.push(line)
|
||||
except KeyboardInterrupt:
|
||||
self.write("\nKeyboardInterrupt\n")
|
||||
self.resetbuffer()
|
||||
more = 0
|
||||
if exitmsg is None:
|
||||
self.write('now exiting %s...\n' % self.__class__.__name__)
|
||||
elif exitmsg != '':
|
||||
self.write('%s\n' % exitmsg)
|
||||
|
||||
def push(self, line):
|
||||
"""Push a line to the interpreter.
|
||||
|
||||
The line should not have a trailing newline; it may have
|
||||
internal newlines. The line is appended to a buffer and the
|
||||
interpreter's runsource() method is called with the
|
||||
concatenated contents of the buffer as source. If this
|
||||
indicates that the command was executed or invalid, the buffer
|
||||
is reset; otherwise, the command is incomplete, and the buffer
|
||||
is left as it was after the line was appended. The return
|
||||
value is 1 if more input is required, 0 if the line was dealt
|
||||
with in some way (this is the same as runsource()).
|
||||
|
||||
"""
|
||||
self.buffer.append(line)
|
||||
source = "\n".join(self.buffer)
|
||||
more = self.runsource(source, self.filename)
|
||||
if not more:
|
||||
self.resetbuffer()
|
||||
return more
|
||||
|
||||
def raw_input(self, prompt=""):
|
||||
"""Write a prompt and read a line.
|
||||
|
||||
The returned line does not include the trailing newline.
|
||||
When the user enters the EOF key sequence, EOFError is raised.
|
||||
|
||||
The base implementation uses the built-in function
|
||||
input(); a subclass may replace this with a different
|
||||
implementation.
|
||||
|
||||
"""
|
||||
return input(prompt)
|
||||
|
||||
|
||||
|
||||
def interact(banner=None, readfunc=None, local=None, exitmsg=None):
|
||||
"""Closely emulate the interactive Python interpreter.
|
||||
|
||||
This is a backwards compatible interface to the InteractiveConsole
|
||||
class. When readfunc is not specified, it attempts to import the
|
||||
readline module to enable GNU readline if it is available.
|
||||
|
||||
Arguments (all optional, all default to None):
|
||||
|
||||
banner -- passed to InteractiveConsole.interact()
|
||||
readfunc -- if not None, replaces InteractiveConsole.raw_input()
|
||||
local -- passed to InteractiveInterpreter.__init__()
|
||||
exitmsg -- passed to InteractiveConsole.interact()
|
||||
|
||||
"""
|
||||
console = InteractiveConsole(local)
|
||||
if readfunc is not None:
|
||||
console.raw_input = readfunc
|
||||
else:
|
||||
try:
|
||||
import readline
|
||||
except ImportError:
|
||||
pass
|
||||
console.interact(banner, exitmsg)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-q', action='store_true',
|
||||
help="don't print version and copyright messages")
|
||||
args = parser.parse_args()
|
||||
if args.q or sys.flags.quiet:
|
||||
banner = ''
|
||||
else:
|
||||
banner = None
|
||||
interact(banner)
|
||||
186
Lib/getpass.py
vendored
Normal file
186
Lib/getpass.py
vendored
Normal file
@@ -0,0 +1,186 @@
|
||||
"""Utilities to get a password and/or the current user name.
|
||||
|
||||
getpass(prompt[, stream]) - Prompt for a password, with echo turned off.
|
||||
getuser() - Get the user name from the environment or password database.
|
||||
|
||||
GetPassWarning - This UserWarning is issued when getpass() cannot prevent
|
||||
echoing of the password contents while reading.
|
||||
|
||||
On Windows, the msvcrt module will be used.
|
||||
On the Mac EasyDialogs.AskPassword is used, if available.
|
||||
|
||||
"""
|
||||
|
||||
# Authors: Piers Lauder (original)
|
||||
# Guido van Rossum (Windows support and cleanup)
|
||||
# Gregory P. Smith (tty support & GetPassWarning)
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
__all__ = ["getpass","getuser","GetPassWarning"]
|
||||
|
||||
|
||||
class GetPassWarning(UserWarning): pass
|
||||
|
||||
|
||||
def unix_getpass(prompt='Password: ', stream=None):
|
||||
"""Prompt for a password, with echo turned off.
|
||||
|
||||
Args:
|
||||
prompt: Written on stream to ask for the input. Default: 'Password: '
|
||||
stream: A writable file object to display the prompt. Defaults to
|
||||
the tty. If no tty is available defaults to sys.stderr.
|
||||
Returns:
|
||||
The seKr3t input.
|
||||
Raises:
|
||||
EOFError: If our input tty or stdin was closed.
|
||||
GetPassWarning: When we were unable to turn echo off on the input.
|
||||
|
||||
Always restores terminal settings before returning.
|
||||
"""
|
||||
passwd = None
|
||||
with contextlib.ExitStack() as stack:
|
||||
try:
|
||||
# Always try reading and writing directly on the tty first.
|
||||
fd = os.open('/dev/tty', os.O_RDWR|os.O_NOCTTY)
|
||||
tty = io.FileIO(fd, 'w+')
|
||||
stack.enter_context(tty)
|
||||
input = io.TextIOWrapper(tty)
|
||||
stack.enter_context(input)
|
||||
if not stream:
|
||||
stream = input
|
||||
except OSError as e:
|
||||
# If that fails, see if stdin can be controlled.
|
||||
stack.close()
|
||||
try:
|
||||
fd = sys.stdin.fileno()
|
||||
except (AttributeError, ValueError):
|
||||
fd = None
|
||||
passwd = fallback_getpass(prompt, stream)
|
||||
input = sys.stdin
|
||||
if not stream:
|
||||
stream = sys.stderr
|
||||
|
||||
if fd is not None:
|
||||
try:
|
||||
old = termios.tcgetattr(fd) # a copy to save
|
||||
new = old[:]
|
||||
new[3] &= ~termios.ECHO # 3 == 'lflags'
|
||||
tcsetattr_flags = termios.TCSAFLUSH
|
||||
if hasattr(termios, 'TCSASOFT'):
|
||||
tcsetattr_flags |= termios.TCSASOFT
|
||||
try:
|
||||
termios.tcsetattr(fd, tcsetattr_flags, new)
|
||||
passwd = _raw_input(prompt, stream, input=input)
|
||||
finally:
|
||||
termios.tcsetattr(fd, tcsetattr_flags, old)
|
||||
stream.flush() # issue7208
|
||||
except termios.error:
|
||||
if passwd is not None:
|
||||
# _raw_input succeeded. The final tcsetattr failed. Reraise
|
||||
# instead of leaving the terminal in an unknown state.
|
||||
raise
|
||||
# We can't control the tty or stdin. Give up and use normal IO.
|
||||
# fallback_getpass() raises an appropriate warning.
|
||||
if stream is not input:
|
||||
# clean up unused file objects before blocking
|
||||
stack.close()
|
||||
passwd = fallback_getpass(prompt, stream)
|
||||
|
||||
stream.write('\n')
|
||||
return passwd
|
||||
|
||||
|
||||
def win_getpass(prompt='Password: ', stream=None):
|
||||
"""Prompt for password with echo off, using Windows getch()."""
|
||||
if sys.stdin is not sys.__stdin__:
|
||||
return fallback_getpass(prompt, stream)
|
||||
|
||||
for c in prompt:
|
||||
msvcrt.putwch(c)
|
||||
pw = ""
|
||||
while 1:
|
||||
c = msvcrt.getwch()
|
||||
if c == '\r' or c == '\n':
|
||||
break
|
||||
if c == '\003':
|
||||
raise KeyboardInterrupt
|
||||
if c == '\b':
|
||||
pw = pw[:-1]
|
||||
else:
|
||||
pw = pw + c
|
||||
msvcrt.putwch('\r')
|
||||
msvcrt.putwch('\n')
|
||||
return pw
|
||||
|
||||
|
||||
def fallback_getpass(prompt='Password: ', stream=None):
|
||||
warnings.warn("Can not control echo on the terminal.", GetPassWarning,
|
||||
stacklevel=2)
|
||||
if not stream:
|
||||
stream = sys.stderr
|
||||
print("Warning: Password input may be echoed.", file=stream)
|
||||
return _raw_input(prompt, stream)
|
||||
|
||||
|
||||
def _raw_input(prompt="", stream=None, input=None):
|
||||
# This doesn't save the string in the GNU readline history.
|
||||
if not stream:
|
||||
stream = sys.stderr
|
||||
if not input:
|
||||
input = sys.stdin
|
||||
prompt = str(prompt)
|
||||
if prompt:
|
||||
try:
|
||||
stream.write(prompt)
|
||||
except UnicodeEncodeError:
|
||||
# Use replace error handler to get as much as possible printed.
|
||||
prompt = prompt.encode(stream.encoding, 'replace')
|
||||
prompt = prompt.decode(stream.encoding)
|
||||
stream.write(prompt)
|
||||
stream.flush()
|
||||
# NOTE: The Python C API calls flockfile() (and unlock) during readline.
|
||||
line = input.readline()
|
||||
if not line:
|
||||
raise EOFError
|
||||
if line[-1] == '\n':
|
||||
line = line[:-1]
|
||||
return line
|
||||
|
||||
|
||||
def getuser():
|
||||
"""Get the username from the environment or password database.
|
||||
|
||||
First try various environment variables, then the password
|
||||
database. This works on Windows as long as USERNAME is set.
|
||||
|
||||
"""
|
||||
|
||||
for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'):
|
||||
user = os.environ.get(name)
|
||||
if user:
|
||||
return user
|
||||
|
||||
# If this fails, the exception will "explain" why
|
||||
import pwd
|
||||
return pwd.getpwuid(os.getuid())[0]
|
||||
|
||||
# Bind the name getpass to the appropriate function
|
||||
try:
|
||||
import termios
|
||||
# it's possible there is an incompatible termios from the
|
||||
# McMillan Installer, make sure we have a UNIX-compatible termios
|
||||
termios.tcgetattr, termios.tcsetattr
|
||||
except (ImportError, AttributeError):
|
||||
try:
|
||||
import msvcrt
|
||||
except ImportError:
|
||||
getpass = fallback_getpass
|
||||
else:
|
||||
getpass = win_getpass
|
||||
else:
|
||||
getpass = unix_getpass
|
||||
145
Lib/hmac.py
vendored
Normal file
145
Lib/hmac.py
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
"""HMAC (Keyed-Hashing for Message Authentication) Python module.
|
||||
|
||||
Implements the HMAC algorithm as described by RFC 2104.
|
||||
"""
|
||||
|
||||
import warnings as _warnings
|
||||
# XXX RustPython TODO: _operator
|
||||
#from _operator import _compare_digest as compare_digest
|
||||
import hashlib as _hashlib
|
||||
|
||||
trans_5C = bytes((x ^ 0x5C) for x in range(256))
|
||||
trans_36 = bytes((x ^ 0x36) for x in range(256))
|
||||
|
||||
# The size of the digests returned by HMAC depends on the underlying
|
||||
# hashing module used. Use digest_size from the instance of HMAC instead.
|
||||
digest_size = None
|
||||
|
||||
|
||||
|
||||
class HMAC:
|
||||
"""RFC 2104 HMAC class. Also complies with RFC 4231.
|
||||
|
||||
This supports the API for Cryptographic Hash Functions (PEP 247).
|
||||
"""
|
||||
blocksize = 64 # 512-bit HMAC; can be changed in subclasses.
|
||||
|
||||
def __init__(self, key, msg = None, digestmod = None):
|
||||
"""Create a new HMAC object.
|
||||
|
||||
key: key for the keyed hash object.
|
||||
msg: Initial input for the hash, if provided.
|
||||
digestmod: A module supporting PEP 247. *OR*
|
||||
A hashlib constructor returning a new hash object. *OR*
|
||||
A hash name suitable for hashlib.new().
|
||||
Defaults to hashlib.md5.
|
||||
Implicit default to hashlib.md5 is deprecated and will be
|
||||
removed in Python 3.6.
|
||||
|
||||
Note: key and msg must be a bytes or bytearray objects.
|
||||
"""
|
||||
|
||||
if not isinstance(key, (bytes, bytearray)):
|
||||
raise TypeError("key: expected bytes or bytearray, but got %r" % type(key).__name__)
|
||||
|
||||
if digestmod is None:
|
||||
_warnings.warn("HMAC() without an explicit digestmod argument "
|
||||
"is deprecated.", PendingDeprecationWarning, 2)
|
||||
digestmod = _hashlib.md5
|
||||
|
||||
if callable(digestmod):
|
||||
self.digest_cons = digestmod
|
||||
elif isinstance(digestmod, str):
|
||||
self.digest_cons = lambda d=b'': _hashlib.new(digestmod, d)
|
||||
else:
|
||||
self.digest_cons = lambda d=b'': digestmod.new(d)
|
||||
|
||||
self.outer = self.digest_cons()
|
||||
self.inner = self.digest_cons()
|
||||
self.digest_size = self.inner.digest_size
|
||||
|
||||
if hasattr(self.inner, 'block_size'):
|
||||
blocksize = self.inner.block_size
|
||||
if blocksize < 16:
|
||||
_warnings.warn('block_size of %d seems too small; using our '
|
||||
'default of %d.' % (blocksize, self.blocksize),
|
||||
RuntimeWarning, 2)
|
||||
blocksize = self.blocksize
|
||||
else:
|
||||
_warnings.warn('No block_size attribute on given digest object; '
|
||||
'Assuming %d.' % (self.blocksize),
|
||||
RuntimeWarning, 2)
|
||||
blocksize = self.blocksize
|
||||
|
||||
# self.blocksize is the default blocksize. self.block_size is
|
||||
# effective block size as well as the public API attribute.
|
||||
self.block_size = blocksize
|
||||
|
||||
if len(key) > blocksize:
|
||||
key = self.digest_cons(key).digest()
|
||||
|
||||
key = key.ljust(blocksize, b'\0')
|
||||
self.outer.update(key.translate(trans_5C))
|
||||
self.inner.update(key.translate(trans_36))
|
||||
if msg is not None:
|
||||
self.update(msg)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "hmac-" + self.inner.name
|
||||
|
||||
def update(self, msg):
|
||||
"""Update this hashing object with the string msg.
|
||||
"""
|
||||
self.inner.update(msg)
|
||||
|
||||
def copy(self):
|
||||
"""Return a separate copy of this hashing object.
|
||||
|
||||
An update to this copy won't affect the original object.
|
||||
"""
|
||||
# Call __new__ directly to avoid the expensive __init__.
|
||||
other = self.__class__.__new__(self.__class__)
|
||||
other.digest_cons = self.digest_cons
|
||||
other.digest_size = self.digest_size
|
||||
other.inner = self.inner.copy()
|
||||
other.outer = self.outer.copy()
|
||||
return other
|
||||
|
||||
def _current(self):
|
||||
"""Return a hash object for the current state.
|
||||
|
||||
To be used only internally with digest() and hexdigest().
|
||||
"""
|
||||
h = self.outer.copy()
|
||||
h.update(self.inner.digest())
|
||||
return h
|
||||
|
||||
def digest(self):
|
||||
"""Return the hash value of this hashing object.
|
||||
|
||||
This returns a string containing 8-bit data. The object is
|
||||
not altered in any way by this function; you can continue
|
||||
updating the object after calling this function.
|
||||
"""
|
||||
h = self._current()
|
||||
return h.digest()
|
||||
|
||||
def hexdigest(self):
|
||||
"""Like digest(), but returns a string of hexadecimal digits instead.
|
||||
"""
|
||||
h = self._current()
|
||||
return h.hexdigest()
|
||||
|
||||
def new(key, msg = None, digestmod = None):
|
||||
"""Create a new hashing object and return it.
|
||||
|
||||
key: The starting key for the hash.
|
||||
msg: if available, will immediately be hashed into the object's starting
|
||||
state.
|
||||
|
||||
You can now feed arbitrary strings into the object using its update()
|
||||
method, and can ask for the hash value at any time by calling its digest()
|
||||
method.
|
||||
"""
|
||||
return HMAC(key, msg, digestmod)
|
||||
132
Lib/html/__init__.py
Normal file
132
Lib/html/__init__.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
General functions for HTML manipulation.
|
||||
"""
|
||||
|
||||
import re as _re
|
||||
from html.entities import html5 as _html5
|
||||
|
||||
|
||||
__all__ = ['escape', 'unescape']
|
||||
|
||||
|
||||
def escape(s, quote=True):
|
||||
"""
|
||||
Replace special characters "&", "<" and ">" to HTML-safe sequences.
|
||||
If the optional flag quote is true (the default), the quotation mark
|
||||
characters, both double quote (") and single quote (') characters are also
|
||||
translated.
|
||||
"""
|
||||
s = s.replace("&", "&") # Must be done first!
|
||||
s = s.replace("<", "<")
|
||||
s = s.replace(">", ">")
|
||||
if quote:
|
||||
s = s.replace('"', """)
|
||||
s = s.replace('\'', "'")
|
||||
return s
|
||||
|
||||
|
||||
# see http://www.w3.org/TR/html5/syntax.html#tokenizing-character-references
|
||||
|
||||
_invalid_charrefs = {
|
||||
0x00: '\ufffd', # REPLACEMENT CHARACTER
|
||||
0x0d: '\r', # CARRIAGE RETURN
|
||||
0x80: '\u20ac', # EURO SIGN
|
||||
0x81: '\x81', # <control>
|
||||
0x82: '\u201a', # SINGLE LOW-9 QUOTATION MARK
|
||||
0x83: '\u0192', # LATIN SMALL LETTER F WITH HOOK
|
||||
0x84: '\u201e', # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x85: '\u2026', # HORIZONTAL ELLIPSIS
|
||||
0x86: '\u2020', # DAGGER
|
||||
0x87: '\u2021', # DOUBLE DAGGER
|
||||
0x88: '\u02c6', # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x89: '\u2030', # PER MILLE SIGN
|
||||
0x8a: '\u0160', # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x8b: '\u2039', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x8c: '\u0152', # LATIN CAPITAL LIGATURE OE
|
||||
0x8d: '\x8d', # <control>
|
||||
0x8e: '\u017d', # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x8f: '\x8f', # <control>
|
||||
0x90: '\x90', # <control>
|
||||
0x91: '\u2018', # LEFT SINGLE QUOTATION MARK
|
||||
0x92: '\u2019', # RIGHT SINGLE QUOTATION MARK
|
||||
0x93: '\u201c', # LEFT DOUBLE QUOTATION MARK
|
||||
0x94: '\u201d', # RIGHT DOUBLE QUOTATION MARK
|
||||
0x95: '\u2022', # BULLET
|
||||
0x96: '\u2013', # EN DASH
|
||||
0x97: '\u2014', # EM DASH
|
||||
0x98: '\u02dc', # SMALL TILDE
|
||||
0x99: '\u2122', # TRADE MARK SIGN
|
||||
0x9a: '\u0161', # LATIN SMALL LETTER S WITH CARON
|
||||
0x9b: '\u203a', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x9c: '\u0153', # LATIN SMALL LIGATURE OE
|
||||
0x9d: '\x9d', # <control>
|
||||
0x9e: '\u017e', # LATIN SMALL LETTER Z WITH CARON
|
||||
0x9f: '\u0178', # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
}
|
||||
|
||||
_invalid_codepoints = {
|
||||
# 0x0001 to 0x0008
|
||||
0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
|
||||
# 0x000E to 0x001F
|
||||
0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
|
||||
0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
# 0x007F to 0x009F
|
||||
0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a,
|
||||
0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
|
||||
0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
|
||||
# 0xFDD0 to 0xFDEF
|
||||
0xfdd0, 0xfdd1, 0xfdd2, 0xfdd3, 0xfdd4, 0xfdd5, 0xfdd6, 0xfdd7, 0xfdd8,
|
||||
0xfdd9, 0xfdda, 0xfddb, 0xfddc, 0xfddd, 0xfdde, 0xfddf, 0xfde0, 0xfde1,
|
||||
0xfde2, 0xfde3, 0xfde4, 0xfde5, 0xfde6, 0xfde7, 0xfde8, 0xfde9, 0xfdea,
|
||||
0xfdeb, 0xfdec, 0xfded, 0xfdee, 0xfdef,
|
||||
# others
|
||||
0xb, 0xfffe, 0xffff, 0x1fffe, 0x1ffff, 0x2fffe, 0x2ffff, 0x3fffe, 0x3ffff,
|
||||
0x4fffe, 0x4ffff, 0x5fffe, 0x5ffff, 0x6fffe, 0x6ffff, 0x7fffe, 0x7ffff,
|
||||
0x8fffe, 0x8ffff, 0x9fffe, 0x9ffff, 0xafffe, 0xaffff, 0xbfffe, 0xbffff,
|
||||
0xcfffe, 0xcffff, 0xdfffe, 0xdffff, 0xefffe, 0xeffff, 0xffffe, 0xfffff,
|
||||
0x10fffe, 0x10ffff
|
||||
}
|
||||
|
||||
|
||||
def _replace_charref(s):
|
||||
s = s.group(1)
|
||||
if s[0] == '#':
|
||||
# numeric charref
|
||||
if s[1] in 'xX':
|
||||
num = int(s[2:].rstrip(';'), 16)
|
||||
else:
|
||||
num = int(s[1:].rstrip(';'))
|
||||
if num in _invalid_charrefs:
|
||||
return _invalid_charrefs[num]
|
||||
if 0xD800 <= num <= 0xDFFF or num > 0x10FFFF:
|
||||
return '\uFFFD'
|
||||
if num in _invalid_codepoints:
|
||||
return ''
|
||||
return chr(num)
|
||||
else:
|
||||
# named charref
|
||||
if s in _html5:
|
||||
return _html5[s]
|
||||
# find the longest matching name (as defined by the standard)
|
||||
for x in range(len(s)-1, 1, -1):
|
||||
if s[:x] in _html5:
|
||||
return _html5[s[:x]] + s[x:]
|
||||
else:
|
||||
return '&' + s
|
||||
|
||||
|
||||
_charref = _re.compile(r'&(#[0-9]+;?'
|
||||
r'|#[xX][0-9a-fA-F]+;?'
|
||||
r'|[^\t\n\f <&#;]{1,32};?)')
|
||||
|
||||
def unescape(s):
|
||||
"""
|
||||
Convert all named and numeric character references (e.g. >, >,
|
||||
&x3e;) in the string s to the corresponding unicode characters.
|
||||
This function uses the rules defined by the HTML 5 standard
|
||||
for both valid and invalid character references, and the list of
|
||||
HTML 5 named character references defined in html.entities.html5.
|
||||
"""
|
||||
if '&' not in s:
|
||||
return s
|
||||
return _charref.sub(_replace_charref, s)
|
||||
2509
Lib/html/entities.py
Normal file
2509
Lib/html/entities.py
Normal file
File diff suppressed because it is too large
Load Diff
470
Lib/html/parser.py
Normal file
470
Lib/html/parser.py
Normal file
@@ -0,0 +1,470 @@
|
||||
"""A parser for HTML and XHTML."""
|
||||
|
||||
# This file is based on sgmllib.py, but the API is slightly different.
|
||||
|
||||
# XXX There should be a way to distinguish between PCDATA (parsed
|
||||
# character data -- the normal case), RCDATA (replaceable character
|
||||
# data -- only char and entity references and end tags are special)
|
||||
# and CDATA (character data -- only end tags are special).
|
||||
|
||||
|
||||
import re
|
||||
import warnings
|
||||
import _markupbase
|
||||
|
||||
from html import unescape
|
||||
|
||||
|
||||
__all__ = ['HTMLParser']
|
||||
|
||||
# Regular expressions used for parsing
|
||||
|
||||
interesting_normal = re.compile('[&<]')
|
||||
incomplete = re.compile('&[a-zA-Z#]')
|
||||
|
||||
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
||||
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
|
||||
|
||||
starttagopen = re.compile('<[a-zA-Z]')
|
||||
piclose = re.compile('>')
|
||||
commentclose = re.compile(r'--\s*>')
|
||||
# Note:
|
||||
# 1) if you change tagfind/attrfind remember to update locatestarttagend too;
|
||||
# 2) if you change tagfind/attrfind and/or locatestarttagend the parser will
|
||||
# explode, so don't do it.
|
||||
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
|
||||
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
|
||||
tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
|
||||
attrfind_tolerant = re.compile(
|
||||
r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
|
||||
locatestarttagend_tolerant = re.compile(r"""
|
||||
<[a-zA-Z][^\t\n\r\f />\x00]* # tag name
|
||||
(?:[\s/]* # optional whitespace before attribute name
|
||||
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
|
||||
(?:\s*=+\s* # value indicator
|
||||
(?:'[^']*' # LITA-enclosed value
|
||||
|"[^"]*" # LIT-enclosed value
|
||||
|(?!['"])[^>\s]* # bare value
|
||||
)
|
||||
(?:\s*,)* # possibly followed by a comma
|
||||
)?(?:\s|/(?!>))*
|
||||
)*
|
||||
)?
|
||||
\s* # trailing whitespace
|
||||
""", re.VERBOSE)
|
||||
endendtag = re.compile('>')
|
||||
# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
|
||||
# </ and the tag name, so maybe this should be fixed
|
||||
endtagfind = re.compile(r'</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
|
||||
|
||||
|
||||
|
||||
class HTMLParser(_markupbase.ParserBase):
|
||||
"""Find tags and other markup and call handler functions.
|
||||
|
||||
Usage:
|
||||
p = HTMLParser()
|
||||
p.feed(data)
|
||||
...
|
||||
p.close()
|
||||
|
||||
Start tags are handled by calling self.handle_starttag() or
|
||||
self.handle_startendtag(); end tags by self.handle_endtag(). The
|
||||
data between tags is passed from the parser to the derived class
|
||||
by calling self.handle_data() with the data as argument (the data
|
||||
may be split up in arbitrary chunks). If convert_charrefs is
|
||||
True the character references are converted automatically to the
|
||||
corresponding Unicode character (and self.handle_data() is no
|
||||
longer split in chunks), otherwise they are passed by calling
|
||||
self.handle_entityref() or self.handle_charref() with the string
|
||||
containing respectively the named or numeric reference as the
|
||||
argument.
|
||||
"""
|
||||
|
||||
CDATA_CONTENT_ELEMENTS = ("script", "style")
|
||||
|
||||
def __init__(self, *, convert_charrefs=True):
|
||||
"""Initialize and reset this instance.
|
||||
|
||||
If convert_charrefs is True (the default), all character references
|
||||
are automatically converted to the corresponding Unicode characters.
|
||||
"""
|
||||
self.convert_charrefs = convert_charrefs
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""Reset this instance. Loses all unprocessed data."""
|
||||
self.rawdata = ''
|
||||
self.lasttag = '???'
|
||||
self.interesting = interesting_normal
|
||||
self.cdata_elem = None
|
||||
_markupbase.ParserBase.reset(self)
|
||||
|
||||
def feed(self, data):
|
||||
r"""Feed data to the parser.
|
||||
|
||||
Call this as often as you want, with as little or as much text
|
||||
as you want (may include '\n').
|
||||
"""
|
||||
self.rawdata = self.rawdata + data
|
||||
self.goahead(0)
|
||||
|
||||
def close(self):
|
||||
"""Handle any buffered data."""
|
||||
self.goahead(1)
|
||||
|
||||
__starttag_text = None
|
||||
|
||||
def get_starttag_text(self):
|
||||
"""Return full source of start tag: '<...>'."""
|
||||
return self.__starttag_text
|
||||
|
||||
def set_cdata_mode(self, elem):
|
||||
self.cdata_elem = elem.lower()
|
||||
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
||||
|
||||
def clear_cdata_mode(self):
|
||||
self.interesting = interesting_normal
|
||||
self.cdata_elem = None
|
||||
|
||||
# Internal -- handle data as far as reasonable. May leave state
|
||||
# and data to be processed by a subsequent call. If 'end' is
|
||||
# true, force handling all data as if followed by EOF marker.
|
||||
def goahead(self, end):
|
||||
rawdata = self.rawdata
|
||||
i = 0
|
||||
n = len(rawdata)
|
||||
while i < n:
|
||||
if self.convert_charrefs and not self.cdata_elem:
|
||||
j = rawdata.find('<', i)
|
||||
if j < 0:
|
||||
# if we can't find the next <, either we are at the end
|
||||
# or there's more text incoming. If the latter is True,
|
||||
# we can't pass the text to handle_data in case we have
|
||||
# a charref cut in half at end. Try to determine if
|
||||
# this is the case before proceeding by looking for an
|
||||
# & near the end and see if it's followed by a space or ;.
|
||||
amppos = rawdata.rfind('&', max(i, n-34))
|
||||
if (amppos >= 0 and
|
||||
not re.compile(r'[\s;]').search(rawdata, amppos)):
|
||||
break # wait till we get all the text
|
||||
j = n
|
||||
else:
|
||||
match = self.interesting.search(rawdata, i) # < or &
|
||||
if match:
|
||||
j = match.start()
|
||||
else:
|
||||
if self.cdata_elem:
|
||||
break
|
||||
j = n
|
||||
if i < j:
|
||||
if self.convert_charrefs and not self.cdata_elem:
|
||||
self.handle_data(unescape(rawdata[i:j]))
|
||||
else:
|
||||
self.handle_data(rawdata[i:j])
|
||||
i = self.updatepos(i, j)
|
||||
if i == n: break
|
||||
startswith = rawdata.startswith
|
||||
if startswith('<', i):
|
||||
if starttagopen.match(rawdata, i): # < + letter
|
||||
k = self.parse_starttag(i)
|
||||
elif startswith("</", i):
|
||||
k = self.parse_endtag(i)
|
||||
elif startswith("<!--", i):
|
||||
k = self.parse_comment(i)
|
||||
elif startswith("<?", i):
|
||||
k = self.parse_pi(i)
|
||||
elif startswith("<!", i):
|
||||
k = self.parse_html_declaration(i)
|
||||
elif (i + 1) < n:
|
||||
self.handle_data("<")
|
||||
k = i + 1
|
||||
else:
|
||||
break
|
||||
if k < 0:
|
||||
if not end:
|
||||
break
|
||||
k = rawdata.find('>', i + 1)
|
||||
if k < 0:
|
||||
k = rawdata.find('<', i + 1)
|
||||
if k < 0:
|
||||
k = i + 1
|
||||
else:
|
||||
k += 1
|
||||
if self.convert_charrefs and not self.cdata_elem:
|
||||
self.handle_data(unescape(rawdata[i:k]))
|
||||
else:
|
||||
self.handle_data(rawdata[i:k])
|
||||
i = self.updatepos(i, k)
|
||||
elif startswith("&#", i):
|
||||
match = charref.match(rawdata, i)
|
||||
if match:
|
||||
name = match.group()[2:-1]
|
||||
self.handle_charref(name)
|
||||
k = match.end()
|
||||
if not startswith(';', k-1):
|
||||
k = k - 1
|
||||
i = self.updatepos(i, k)
|
||||
continue
|
||||
else:
|
||||
if ";" in rawdata[i:]: # bail by consuming &#
|
||||
self.handle_data(rawdata[i:i+2])
|
||||
i = self.updatepos(i, i+2)
|
||||
break
|
||||
elif startswith('&', i):
|
||||
match = entityref.match(rawdata, i)
|
||||
if match:
|
||||
name = match.group(1)
|
||||
self.handle_entityref(name)
|
||||
k = match.end()
|
||||
if not startswith(';', k-1):
|
||||
k = k - 1
|
||||
i = self.updatepos(i, k)
|
||||
continue
|
||||
match = incomplete.match(rawdata, i)
|
||||
if match:
|
||||
# match.group() will contain at least 2 chars
|
||||
if end and match.group() == rawdata[i:]:
|
||||
k = match.end()
|
||||
if k <= i:
|
||||
k = n
|
||||
i = self.updatepos(i, i + 1)
|
||||
# incomplete
|
||||
break
|
||||
elif (i + 1) < n:
|
||||
# not the end of the buffer, and can't be confused
|
||||
# with some other construct
|
||||
self.handle_data("&")
|
||||
i = self.updatepos(i, i + 1)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
assert 0, "interesting.search() lied"
|
||||
# end while
|
||||
if end and i < n and not self.cdata_elem:
|
||||
if self.convert_charrefs and not self.cdata_elem:
|
||||
self.handle_data(unescape(rawdata[i:n]))
|
||||
else:
|
||||
self.handle_data(rawdata[i:n])
|
||||
i = self.updatepos(i, n)
|
||||
self.rawdata = rawdata[i:]
|
||||
|
||||
# Internal -- parse html declarations, return length or -1 if not terminated
|
||||
# See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
|
||||
# See also parse_declaration in _markupbase
|
||||
def parse_html_declaration(self, i):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] == '<!', ('unexpected call to '
|
||||
'parse_html_declaration()')
|
||||
if rawdata[i:i+4] == '<!--':
|
||||
# this case is actually already handled in goahead()
|
||||
return self.parse_comment(i)
|
||||
elif rawdata[i:i+3] == '<![':
|
||||
return self.parse_marked_section(i)
|
||||
elif rawdata[i:i+9].lower() == '<!doctype':
|
||||
# find the closing >
|
||||
gtpos = rawdata.find('>', i+9)
|
||||
if gtpos == -1:
|
||||
return -1
|
||||
self.handle_decl(rawdata[i+2:gtpos])
|
||||
return gtpos+1
|
||||
else:
|
||||
return self.parse_bogus_comment(i)
|
||||
|
||||
# Internal -- parse bogus comment, return length or -1 if not terminated
|
||||
# see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
|
||||
def parse_bogus_comment(self, i, report=1):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
|
||||
'parse_comment()')
|
||||
pos = rawdata.find('>', i+2)
|
||||
if pos == -1:
|
||||
return -1
|
||||
if report:
|
||||
self.handle_comment(rawdata[i+2:pos])
|
||||
return pos + 1
|
||||
|
||||
# Internal -- parse processing instr, return end or -1 if not terminated
|
||||
def parse_pi(self, i):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
|
||||
match = piclose.search(rawdata, i+2) # >
|
||||
if not match:
|
||||
return -1
|
||||
j = match.start()
|
||||
self.handle_pi(rawdata[i+2: j])
|
||||
j = match.end()
|
||||
return j
|
||||
|
||||
# Internal -- handle starttag, return end or -1 if not terminated
|
||||
def parse_starttag(self, i):
|
||||
self.__starttag_text = None
|
||||
endpos = self.check_for_whole_start_tag(i)
|
||||
if endpos < 0:
|
||||
return endpos
|
||||
rawdata = self.rawdata
|
||||
self.__starttag_text = rawdata[i:endpos]
|
||||
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
match = tagfind_tolerant.match(rawdata, i+1)
|
||||
assert match, 'unexpected call to parse_starttag()'
|
||||
k = match.end()
|
||||
self.lasttag = tag = match.group(1).lower()
|
||||
while k < endpos:
|
||||
m = attrfind_tolerant.match(rawdata, k)
|
||||
if not m:
|
||||
break
|
||||
attrname, rest, attrvalue = m.group(1, 2, 3)
|
||||
if not rest:
|
||||
attrvalue = None
|
||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
||||
attrvalue = attrvalue[1:-1]
|
||||
if attrvalue:
|
||||
attrvalue = unescape(attrvalue)
|
||||
attrs.append((attrname.lower(), attrvalue))
|
||||
k = m.end()
|
||||
|
||||
end = rawdata[k:endpos].strip()
|
||||
if end not in (">", "/>"):
|
||||
lineno, offset = self.getpos()
|
||||
if "\n" in self.__starttag_text:
|
||||
lineno = lineno + self.__starttag_text.count("\n")
|
||||
offset = len(self.__starttag_text) \
|
||||
- self.__starttag_text.rfind("\n")
|
||||
else:
|
||||
offset = offset + len(self.__starttag_text)
|
||||
self.handle_data(rawdata[i:endpos])
|
||||
return endpos
|
||||
if end.endswith('/>'):
|
||||
# XHTML-style empty tag: <span attr="value" />
|
||||
self.handle_startendtag(tag, attrs)
|
||||
else:
|
||||
self.handle_starttag(tag, attrs)
|
||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
||||
self.set_cdata_mode(tag)
|
||||
return endpos
|
||||
|
||||
# Internal -- check to see if we have a complete starttag; return end
|
||||
# or -1 if incomplete.
|
||||
def check_for_whole_start_tag(self, i):
|
||||
rawdata = self.rawdata
|
||||
m = locatestarttagend_tolerant.match(rawdata, i)
|
||||
if m:
|
||||
j = m.end()
|
||||
next = rawdata[j:j+1]
|
||||
if next == ">":
|
||||
return j + 1
|
||||
if next == "/":
|
||||
if rawdata.startswith("/>", j):
|
||||
return j + 2
|
||||
if rawdata.startswith("/", j):
|
||||
# buffer boundary
|
||||
return -1
|
||||
# else bogus input
|
||||
if j > i:
|
||||
return j
|
||||
else:
|
||||
return i + 1
|
||||
if next == "":
|
||||
# end of input
|
||||
return -1
|
||||
if next in ("abcdefghijklmnopqrstuvwxyz=/"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
|
||||
# end of input in or before attribute value, or we have the
|
||||
# '/' from a '/>' ending
|
||||
return -1
|
||||
if j > i:
|
||||
return j
|
||||
else:
|
||||
return i + 1
|
||||
raise AssertionError("we should not get here!")
|
||||
|
||||
# Internal -- parse endtag, return end or -1 if incomplete
|
||||
def parse_endtag(self, i):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
|
||||
match = endendtag.search(rawdata, i+1) # >
|
||||
if not match:
|
||||
return -1
|
||||
gtpos = match.end()
|
||||
match = endtagfind.match(rawdata, i) # </ + tag + >
|
||||
if not match:
|
||||
if self.cdata_elem is not None:
|
||||
self.handle_data(rawdata[i:gtpos])
|
||||
return gtpos
|
||||
# find the name: w3.org/TR/html5/tokenization.html#tag-name-state
|
||||
namematch = tagfind_tolerant.match(rawdata, i+2)
|
||||
if not namematch:
|
||||
# w3.org/TR/html5/tokenization.html#end-tag-open-state
|
||||
if rawdata[i:i+3] == '</>':
|
||||
return i+3
|
||||
else:
|
||||
return self.parse_bogus_comment(i)
|
||||
tagname = namematch.group(1).lower()
|
||||
# consume and ignore other stuff between the name and the >
|
||||
# Note: this is not 100% correct, since we might have things like
|
||||
# </tag attr=">">, but looking for > after tha name should cover
|
||||
# most of the cases and is much simpler
|
||||
gtpos = rawdata.find('>', namematch.end())
|
||||
self.handle_endtag(tagname)
|
||||
return gtpos+1
|
||||
|
||||
elem = match.group(1).lower() # script or style
|
||||
if self.cdata_elem is not None:
|
||||
if elem != self.cdata_elem:
|
||||
self.handle_data(rawdata[i:gtpos])
|
||||
return gtpos
|
||||
|
||||
self.handle_endtag(elem.lower())
|
||||
self.clear_cdata_mode()
|
||||
return gtpos
|
||||
|
||||
# Overridable -- finish processing of start+end tag: <tag.../>
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
self.handle_starttag(tag, attrs)
|
||||
self.handle_endtag(tag)
|
||||
|
||||
# Overridable -- handle start tag
|
||||
def handle_starttag(self, tag, attrs):
|
||||
pass
|
||||
|
||||
# Overridable -- handle end tag
|
||||
def handle_endtag(self, tag):
|
||||
pass
|
||||
|
||||
# Overridable -- handle character reference
|
||||
def handle_charref(self, name):
|
||||
pass
|
||||
|
||||
# Overridable -- handle entity reference
|
||||
def handle_entityref(self, name):
|
||||
pass
|
||||
|
||||
# Overridable -- handle data
|
||||
def handle_data(self, data):
|
||||
pass
|
||||
|
||||
# Overridable -- handle comment
|
||||
def handle_comment(self, data):
|
||||
pass
|
||||
|
||||
# Overridable -- handle declaration
|
||||
def handle_decl(self, decl):
|
||||
pass
|
||||
|
||||
# Overridable -- handle processing instruction
|
||||
def handle_pi(self, data):
|
||||
pass
|
||||
|
||||
def unknown_decl(self, data):
|
||||
pass
|
||||
|
||||
# Internal -- helper to remove special character quoting
|
||||
def unescape(self, s):
|
||||
warnings.warn('The unescape method is deprecated and will be removed '
|
||||
'in 3.5, use html.unescape() instead.',
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return unescape(s)
|
||||
134
Lib/http/__init__.py
Normal file
134
Lib/http/__init__.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from enum import IntEnum
|
||||
|
||||
__all__ = ['HTTPStatus']
|
||||
|
||||
class HTTPStatus(IntEnum):
|
||||
"""HTTP status codes and reason phrases
|
||||
|
||||
Status codes from the following RFCs are all observed:
|
||||
|
||||
* RFC 7231: Hypertext Transfer Protocol (HTTP/1.1), obsoletes 2616
|
||||
* RFC 6585: Additional HTTP Status Codes
|
||||
* RFC 3229: Delta encoding in HTTP
|
||||
* RFC 4918: HTTP Extensions for WebDAV, obsoletes 2518
|
||||
* RFC 5842: Binding Extensions to WebDAV
|
||||
* RFC 7238: Permanent Redirect
|
||||
* RFC 2295: Transparent Content Negotiation in HTTP
|
||||
* RFC 2774: An HTTP Extension Framework
|
||||
"""
|
||||
def __new__(cls, value, phrase, description=''):
|
||||
obj = int.__new__(cls, value)
|
||||
obj._value_ = value
|
||||
|
||||
obj.phrase = phrase
|
||||
obj.description = description
|
||||
return obj
|
||||
|
||||
# informational
|
||||
CONTINUE = 100, 'Continue', 'Request received, please continue'
|
||||
SWITCHING_PROTOCOLS = (101, 'Switching Protocols',
|
||||
'Switching to new protocol; obey Upgrade header')
|
||||
PROCESSING = 102, 'Processing'
|
||||
|
||||
# success
|
||||
OK = 200, 'OK', 'Request fulfilled, document follows'
|
||||
CREATED = 201, 'Created', 'Document created, URL follows'
|
||||
ACCEPTED = (202, 'Accepted',
|
||||
'Request accepted, processing continues off-line')
|
||||
NON_AUTHORITATIVE_INFORMATION = (203,
|
||||
'Non-Authoritative Information', 'Request fulfilled from cache')
|
||||
NO_CONTENT = 204, 'No Content', 'Request fulfilled, nothing follows'
|
||||
RESET_CONTENT = 205, 'Reset Content', 'Clear input form for further input'
|
||||
PARTIAL_CONTENT = 206, 'Partial Content', 'Partial content follows'
|
||||
MULTI_STATUS = 207, 'Multi-Status'
|
||||
ALREADY_REPORTED = 208, 'Already Reported'
|
||||
IM_USED = 226, 'IM Used'
|
||||
|
||||
# redirection
|
||||
MULTIPLE_CHOICES = (300, 'Multiple Choices',
|
||||
'Object has several resources -- see URI list')
|
||||
MOVED_PERMANENTLY = (301, 'Moved Permanently',
|
||||
'Object moved permanently -- see URI list')
|
||||
FOUND = 302, 'Found', 'Object moved temporarily -- see URI list'
|
||||
SEE_OTHER = 303, 'See Other', 'Object moved -- see Method and URL list'
|
||||
NOT_MODIFIED = (304, 'Not Modified',
|
||||
'Document has not changed since given time')
|
||||
USE_PROXY = (305, 'Use Proxy',
|
||||
'You must use proxy specified in Location to access this resource')
|
||||
TEMPORARY_REDIRECT = (307, 'Temporary Redirect',
|
||||
'Object moved temporarily -- see URI list')
|
||||
PERMANENT_REDIRECT = (308, 'Permanent Redirect',
|
||||
'Object moved temporarily -- see URI list')
|
||||
|
||||
# client error
|
||||
BAD_REQUEST = (400, 'Bad Request',
|
||||
'Bad request syntax or unsupported method')
|
||||
UNAUTHORIZED = (401, 'Unauthorized',
|
||||
'No permission -- see authorization schemes')
|
||||
PAYMENT_REQUIRED = (402, 'Payment Required',
|
||||
'No payment -- see charging schemes')
|
||||
FORBIDDEN = (403, 'Forbidden',
|
||||
'Request forbidden -- authorization will not help')
|
||||
NOT_FOUND = (404, 'Not Found',
|
||||
'Nothing matches the given URI')
|
||||
METHOD_NOT_ALLOWED = (405, 'Method Not Allowed',
|
||||
'Specified method is invalid for this resource')
|
||||
NOT_ACCEPTABLE = (406, 'Not Acceptable',
|
||||
'URI not available in preferred format')
|
||||
PROXY_AUTHENTICATION_REQUIRED = (407,
|
||||
'Proxy Authentication Required',
|
||||
'You must authenticate with this proxy before proceeding')
|
||||
REQUEST_TIMEOUT = (408, 'Request Timeout',
|
||||
'Request timed out; try again later')
|
||||
CONFLICT = 409, 'Conflict', 'Request conflict'
|
||||
GONE = (410, 'Gone',
|
||||
'URI no longer exists and has been permanently removed')
|
||||
LENGTH_REQUIRED = (411, 'Length Required',
|
||||
'Client must specify Content-Length')
|
||||
PRECONDITION_FAILED = (412, 'Precondition Failed',
|
||||
'Precondition in headers is false')
|
||||
REQUEST_ENTITY_TOO_LARGE = (413, 'Request Entity Too Large',
|
||||
'Entity is too large')
|
||||
REQUEST_URI_TOO_LONG = (414, 'Request-URI Too Long',
|
||||
'URI is too long')
|
||||
UNSUPPORTED_MEDIA_TYPE = (415, 'Unsupported Media Type',
|
||||
'Entity body in unsupported format')
|
||||
REQUESTED_RANGE_NOT_SATISFIABLE = (416,
|
||||
'Requested Range Not Satisfiable',
|
||||
'Cannot satisfy request range')
|
||||
EXPECTATION_FAILED = (417, 'Expectation Failed',
|
||||
'Expect condition could not be satisfied')
|
||||
UNPROCESSABLE_ENTITY = 422, 'Unprocessable Entity'
|
||||
LOCKED = 423, 'Locked'
|
||||
FAILED_DEPENDENCY = 424, 'Failed Dependency'
|
||||
UPGRADE_REQUIRED = 426, 'Upgrade Required'
|
||||
PRECONDITION_REQUIRED = (428, 'Precondition Required',
|
||||
'The origin server requires the request to be conditional')
|
||||
TOO_MANY_REQUESTS = (429, 'Too Many Requests',
|
||||
'The user has sent too many requests in '
|
||||
'a given amount of time ("rate limiting")')
|
||||
REQUEST_HEADER_FIELDS_TOO_LARGE = (431,
|
||||
'Request Header Fields Too Large',
|
||||
'The server is unwilling to process the request because its header '
|
||||
'fields are too large')
|
||||
|
||||
# server errors
|
||||
INTERNAL_SERVER_ERROR = (500, 'Internal Server Error',
|
||||
'Server got itself in trouble')
|
||||
NOT_IMPLEMENTED = (501, 'Not Implemented',
|
||||
'Server does not support this operation')
|
||||
BAD_GATEWAY = (502, 'Bad Gateway',
|
||||
'Invalid responses from another server/proxy')
|
||||
SERVICE_UNAVAILABLE = (503, 'Service Unavailable',
|
||||
'The server cannot process the request due to a high load')
|
||||
GATEWAY_TIMEOUT = (504, 'Gateway Timeout',
|
||||
'The gateway server did not receive a timely response')
|
||||
HTTP_VERSION_NOT_SUPPORTED = (505, 'HTTP Version Not Supported',
|
||||
'Cannot fulfill request')
|
||||
VARIANT_ALSO_NEGOTIATES = 506, 'Variant Also Negotiates'
|
||||
INSUFFICIENT_STORAGE = 507, 'Insufficient Storage'
|
||||
LOOP_DETECTED = 508, 'Loop Detected'
|
||||
NOT_EXTENDED = 510, 'Not Extended'
|
||||
NETWORK_AUTHENTICATION_REQUIRED = (511,
|
||||
'Network Authentication Required',
|
||||
'The client needs to authenticate to gain network access')
|
||||
1478
Lib/http/client.py
Normal file
1478
Lib/http/client.py
Normal file
File diff suppressed because it is too large
Load Diff
2098
Lib/http/cookiejar.py
Normal file
2098
Lib/http/cookiejar.py
Normal file
File diff suppressed because it is too large
Load Diff
635
Lib/http/cookies.py
Normal file
635
Lib/http/cookies.py
Normal file
@@ -0,0 +1,635 @@
|
||||
####
|
||||
# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu>
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software
|
||||
# and its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Timothy O'Malley not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
|
||||
# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
# PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
####
|
||||
#
|
||||
# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp
|
||||
# by Timothy O'Malley <timo@alum.mit.edu>
|
||||
#
|
||||
# Cookie.py is a Python module for the handling of HTTP
|
||||
# cookies as a Python dictionary. See RFC 2109 for more
|
||||
# information on cookies.
|
||||
#
|
||||
# The original idea to treat Cookies as a dictionary came from
|
||||
# Dave Mitchell (davem@magnet.com) in 1995, when he released the
|
||||
# first version of nscookie.py.
|
||||
#
|
||||
####
|
||||
|
||||
r"""
|
||||
Here's a sample session to show how to use this module.
|
||||
At the moment, this is the only documentation.
|
||||
|
||||
The Basics
|
||||
----------
|
||||
|
||||
Importing is easy...
|
||||
|
||||
>>> from http import cookies
|
||||
|
||||
Most of the time you start by creating a cookie.
|
||||
|
||||
>>> C = cookies.SimpleCookie()
|
||||
|
||||
Once you've created your Cookie, you can add values just as if it were
|
||||
a dictionary.
|
||||
|
||||
>>> C = cookies.SimpleCookie()
|
||||
>>> C["fig"] = "newton"
|
||||
>>> C["sugar"] = "wafer"
|
||||
>>> C.output()
|
||||
'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer'
|
||||
|
||||
Notice that the printable representation of a Cookie is the
|
||||
appropriate format for a Set-Cookie: header. This is the
|
||||
default behavior. You can change the header and printed
|
||||
attributes by using the .output() function
|
||||
|
||||
>>> C = cookies.SimpleCookie()
|
||||
>>> C["rocky"] = "road"
|
||||
>>> C["rocky"]["path"] = "/cookie"
|
||||
>>> print(C.output(header="Cookie:"))
|
||||
Cookie: rocky=road; Path=/cookie
|
||||
>>> print(C.output(attrs=[], header="Cookie:"))
|
||||
Cookie: rocky=road
|
||||
|
||||
The load() method of a Cookie extracts cookies from a string. In a
|
||||
CGI script, you would use this method to extract the cookies from the
|
||||
HTTP_COOKIE environment variable.
|
||||
|
||||
>>> C = cookies.SimpleCookie()
|
||||
>>> C.load("chips=ahoy; vienna=finger")
|
||||
>>> C.output()
|
||||
'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger'
|
||||
|
||||
The load() method is darn-tootin smart about identifying cookies
|
||||
within a string. Escaped quotation marks, nested semicolons, and other
|
||||
such trickeries do not confuse it.
|
||||
|
||||
>>> C = cookies.SimpleCookie()
|
||||
>>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";')
|
||||
>>> print(C)
|
||||
Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"
|
||||
|
||||
Each element of the Cookie also supports all of the RFC 2109
|
||||
Cookie attributes. Here's an example which sets the Path
|
||||
attribute.
|
||||
|
||||
>>> C = cookies.SimpleCookie()
|
||||
>>> C["oreo"] = "doublestuff"
|
||||
>>> C["oreo"]["path"] = "/"
|
||||
>>> print(C)
|
||||
Set-Cookie: oreo=doublestuff; Path=/
|
||||
|
||||
Each dictionary element has a 'value' attribute, which gives you
|
||||
back the value associated with the key.
|
||||
|
||||
>>> C = cookies.SimpleCookie()
|
||||
>>> C["twix"] = "none for you"
|
||||
>>> C["twix"].value
|
||||
'none for you'
|
||||
|
||||
The SimpleCookie expects that all values should be standard strings.
|
||||
Just to be sure, SimpleCookie invokes the str() builtin to convert
|
||||
the value to a string, when the values are set dictionary-style.
|
||||
|
||||
>>> C = cookies.SimpleCookie()
|
||||
>>> C["number"] = 7
|
||||
>>> C["string"] = "seven"
|
||||
>>> C["number"].value
|
||||
'7'
|
||||
>>> C["string"].value
|
||||
'seven'
|
||||
>>> C.output()
|
||||
'Set-Cookie: number=7\r\nSet-Cookie: string=seven'
|
||||
|
||||
Finis.
|
||||
"""
|
||||
|
||||
#
|
||||
# Import our required modules
|
||||
#
|
||||
import re
|
||||
import string
|
||||
|
||||
__all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
|
||||
|
||||
_nulljoin = ''.join
|
||||
_semispacejoin = '; '.join
|
||||
_spacejoin = ' '.join
|
||||
|
||||
def _warn_deprecated_setter(setter):
|
||||
import warnings
|
||||
msg = ('The .%s setter is deprecated. The attribute will be read-only in '
|
||||
'future releases. Please use the set() method instead.' % setter)
|
||||
warnings.warn(msg, DeprecationWarning, stacklevel=3)
|
||||
|
||||
#
|
||||
# Define an exception visible to External modules
|
||||
#
|
||||
class CookieError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# These quoting routines conform to the RFC2109 specification, which in
|
||||
# turn references the character definitions from RFC2068. They provide
|
||||
# a two-way quoting algorithm. Any non-text character is translated
|
||||
# into a 4 character sequence: a forward-slash followed by the
|
||||
# three-digit octal equivalent of the character. Any '\' or '"' is
|
||||
# quoted with a preceding '\' slash.
|
||||
# Because of the way browsers really handle cookies (as opposed to what
|
||||
# the RFC says) we also encode "," and ";".
|
||||
#
|
||||
# These are taken from RFC2068 and RFC2109.
|
||||
# _LegalChars is the list of chars which don't require "'s
|
||||
# _Translator hash-table for fast quoting
|
||||
#
|
||||
_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:"
|
||||
_UnescapedChars = _LegalChars + ' ()/<=>?@[]{}'
|
||||
|
||||
_Translator = {n: '\\%03o' % n
|
||||
for n in set(range(256)) - set(map(ord, _UnescapedChars))}
|
||||
_Translator.update({
|
||||
ord('"'): '\\"',
|
||||
ord('\\'): '\\\\',
|
||||
})
|
||||
|
||||
_is_legal_key = re.compile('[%s]+' % re.escape(_LegalChars)).fullmatch
|
||||
|
||||
def _quote(str):
|
||||
r"""Quote a string for use in a cookie header.
|
||||
|
||||
If the string does not need to be double-quoted, then just return the
|
||||
string. Otherwise, surround the string in doublequotes and quote
|
||||
(with a \) special characters.
|
||||
"""
|
||||
if str is None or _is_legal_key(str):
|
||||
return str
|
||||
else:
|
||||
return '"' + str.translate(_Translator) + '"'
|
||||
|
||||
|
||||
_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
|
||||
_QuotePatt = re.compile(r"[\\].")
|
||||
|
||||
def _unquote(str):
|
||||
# If there aren't any doublequotes,
|
||||
# then there can't be any special characters. See RFC 2109.
|
||||
if str is None or len(str) < 2:
|
||||
return str
|
||||
if str[0] != '"' or str[-1] != '"':
|
||||
return str
|
||||
|
||||
# We have to assume that we must decode this string.
|
||||
# Down to work.
|
||||
|
||||
# Remove the "s
|
||||
str = str[1:-1]
|
||||
|
||||
# Check for special sequences. Examples:
|
||||
# \012 --> \n
|
||||
# \" --> "
|
||||
#
|
||||
i = 0
|
||||
n = len(str)
|
||||
res = []
|
||||
while 0 <= i < n:
|
||||
o_match = _OctalPatt.search(str, i)
|
||||
q_match = _QuotePatt.search(str, i)
|
||||
if not o_match and not q_match: # Neither matched
|
||||
res.append(str[i:])
|
||||
break
|
||||
# else:
|
||||
j = k = -1
|
||||
if o_match:
|
||||
j = o_match.start(0)
|
||||
if q_match:
|
||||
k = q_match.start(0)
|
||||
if q_match and (not o_match or k < j): # QuotePatt matched
|
||||
res.append(str[i:k])
|
||||
res.append(str[k+1])
|
||||
i = k + 2
|
||||
else: # OctalPatt matched
|
||||
res.append(str[i:j])
|
||||
res.append(chr(int(str[j+1:j+4], 8)))
|
||||
i = j + 4
|
||||
return _nulljoin(res)
|
||||
|
||||
# The _getdate() routine is used to set the expiration time in the cookie's HTTP
|
||||
# header. By default, _getdate() returns the current time in the appropriate
|
||||
# "expires" format for a Set-Cookie header. The one optional argument is an
|
||||
# offset from now, in seconds. For example, an offset of -3600 means "one hour
|
||||
# ago". The offset may be a floating point number.
|
||||
#
|
||||
|
||||
_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
||||
|
||||
_monthname = [None,
|
||||
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
|
||||
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||
|
||||
def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname):
|
||||
from time import gmtime, time
|
||||
now = time()
|
||||
year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future)
|
||||
return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \
|
||||
(weekdayname[wd], day, monthname[month], year, hh, mm, ss)
|
||||
|
||||
|
||||
class Morsel(dict):
|
||||
"""A class to hold ONE (key, value) pair.
|
||||
|
||||
In a cookie, each such pair may have several attributes, so this class is
|
||||
used to keep the attributes associated with the appropriate key,value pair.
|
||||
This class also includes a coded_value attribute, which is used to hold
|
||||
the network representation of the value. This is most useful when Python
|
||||
objects are pickled for network transit.
|
||||
"""
|
||||
# RFC 2109 lists these attributes as reserved:
|
||||
# path comment domain
|
||||
# max-age secure version
|
||||
#
|
||||
# For historical reasons, these attributes are also reserved:
|
||||
# expires
|
||||
#
|
||||
# This is an extension from Microsoft:
|
||||
# httponly
|
||||
#
|
||||
# This dictionary provides a mapping from the lowercase
|
||||
# variant on the left to the appropriate traditional
|
||||
# formatting on the right.
|
||||
_reserved = {
|
||||
"expires" : "expires",
|
||||
"path" : "Path",
|
||||
"comment" : "Comment",
|
||||
"domain" : "Domain",
|
||||
"max-age" : "Max-Age",
|
||||
"secure" : "Secure",
|
||||
"httponly" : "HttpOnly",
|
||||
"version" : "Version",
|
||||
}
|
||||
|
||||
_flags = {'secure', 'httponly'}
|
||||
|
||||
def __init__(self):
|
||||
# Set defaults
|
||||
self._key = self._value = self._coded_value = None
|
||||
|
||||
# Set default attributes
|
||||
for key in self._reserved:
|
||||
dict.__setitem__(self, key, "")
|
||||
|
||||
@property
|
||||
def key(self):
|
||||
return self._key
|
||||
|
||||
@key.setter
|
||||
def key(self, key):
|
||||
_warn_deprecated_setter('key')
|
||||
self._key = key
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self._value
|
||||
|
||||
@value.setter
|
||||
def value(self, value):
|
||||
_warn_deprecated_setter('value')
|
||||
self._value = value
|
||||
|
||||
@property
|
||||
def coded_value(self):
|
||||
return self._coded_value
|
||||
|
||||
@coded_value.setter
|
||||
def coded_value(self, coded_value):
|
||||
_warn_deprecated_setter('coded_value')
|
||||
self._coded_value = coded_value
|
||||
|
||||
def __setitem__(self, K, V):
|
||||
K = K.lower()
|
||||
if not K in self._reserved:
|
||||
raise CookieError("Invalid attribute %r" % (K,))
|
||||
dict.__setitem__(self, K, V)
|
||||
|
||||
def setdefault(self, key, val=None):
|
||||
key = key.lower()
|
||||
if key not in self._reserved:
|
||||
raise CookieError("Invalid attribute %r" % (key,))
|
||||
return dict.setdefault(self, key, val)
|
||||
|
||||
def __eq__(self, morsel):
|
||||
if not isinstance(morsel, Morsel):
|
||||
return NotImplemented
|
||||
return (dict.__eq__(self, morsel) and
|
||||
self._value == morsel._value and
|
||||
self._key == morsel._key and
|
||||
self._coded_value == morsel._coded_value)
|
||||
|
||||
__ne__ = object.__ne__
|
||||
|
||||
def copy(self):
|
||||
morsel = Morsel()
|
||||
dict.update(morsel, self)
|
||||
morsel.__dict__.update(self.__dict__)
|
||||
return morsel
|
||||
|
||||
def update(self, values):
|
||||
data = {}
|
||||
for key, val in dict(values).items():
|
||||
key = key.lower()
|
||||
if key not in self._reserved:
|
||||
raise CookieError("Invalid attribute %r" % (key,))
|
||||
data[key] = val
|
||||
dict.update(self, data)
|
||||
|
||||
def isReservedKey(self, K):
|
||||
return K.lower() in self._reserved
|
||||
|
||||
def set(self, key, val, coded_val, LegalChars=_LegalChars):
|
||||
if LegalChars != _LegalChars:
|
||||
import warnings
|
||||
warnings.warn(
|
||||
'LegalChars parameter is deprecated, ignored and will '
|
||||
'be removed in future versions.', DeprecationWarning,
|
||||
stacklevel=2)
|
||||
|
||||
if key.lower() in self._reserved:
|
||||
raise CookieError('Attempt to set a reserved key %r' % (key,))
|
||||
if not _is_legal_key(key):
|
||||
raise CookieError('Illegal key %r' % (key,))
|
||||
|
||||
# It's a good key, so save it.
|
||||
self._key = key
|
||||
self._value = val
|
||||
self._coded_value = coded_val
|
||||
|
||||
def __getstate__(self):
|
||||
return {
|
||||
'key': self._key,
|
||||
'value': self._value,
|
||||
'coded_value': self._coded_value,
|
||||
}
|
||||
|
||||
def __setstate__(self, state):
|
||||
self._key = state['key']
|
||||
self._value = state['value']
|
||||
self._coded_value = state['coded_value']
|
||||
|
||||
def output(self, attrs=None, header="Set-Cookie:"):
|
||||
return "%s %s" % (header, self.OutputString(attrs))
|
||||
|
||||
__str__ = output
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s: %s>' % (self.__class__.__name__, self.OutputString())
|
||||
|
||||
def js_output(self, attrs=None):
|
||||
# Print javascript
|
||||
return """
|
||||
<script type="text/javascript">
|
||||
<!-- begin hiding
|
||||
document.cookie = \"%s\";
|
||||
// end hiding -->
|
||||
</script>
|
||||
""" % (self.OutputString(attrs).replace('"', r'\"'))
|
||||
|
||||
def OutputString(self, attrs=None):
|
||||
# Build up our result
|
||||
#
|
||||
result = []
|
||||
append = result.append
|
||||
|
||||
# First, the key=value pair
|
||||
append("%s=%s" % (self.key, self.coded_value))
|
||||
|
||||
# Now add any defined attributes
|
||||
if attrs is None:
|
||||
attrs = self._reserved
|
||||
items = sorted(self.items())
|
||||
for key, value in items:
|
||||
if value == "":
|
||||
continue
|
||||
if key not in attrs:
|
||||
continue
|
||||
if key == "expires" and isinstance(value, int):
|
||||
append("%s=%s" % (self._reserved[key], _getdate(value)))
|
||||
elif key == "max-age" and isinstance(value, int):
|
||||
append("%s=%d" % (self._reserved[key], value))
|
||||
elif key in self._flags:
|
||||
if value:
|
||||
append(str(self._reserved[key]))
|
||||
else:
|
||||
append("%s=%s" % (self._reserved[key], value))
|
||||
|
||||
# Return the result
|
||||
return _semispacejoin(result)
|
||||
|
||||
|
||||
#
|
||||
# Pattern for finding cookie
|
||||
#
|
||||
# This used to be strict parsing based on the RFC2109 and RFC2068
|
||||
# specifications. I have since discovered that MSIE 3.0x doesn't
|
||||
# follow the character rules outlined in those specs. As a
|
||||
# result, the parsing rules here are less strict.
|
||||
#
|
||||
|
||||
_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
|
||||
_LegalValueChars = _LegalKeyChars + r'\[\]'
|
||||
_CookiePattern = re.compile(r"""
|
||||
\s* # Optional whitespace at start of cookie
|
||||
(?P<key> # Start of group 'key'
|
||||
[""" + _LegalKeyChars + r"""]+? # Any word of at least one letter
|
||||
) # End of group 'key'
|
||||
( # Optional group: there may not be a value.
|
||||
\s*=\s* # Equal Sign
|
||||
(?P<val> # Start of group 'val'
|
||||
"(?:[^\\"]|\\.)*" # Any doublequoted string
|
||||
| # or
|
||||
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
|
||||
| # or
|
||||
[""" + _LegalValueChars + r"""]* # Any word or empty string
|
||||
) # End of group 'val'
|
||||
)? # End of optional value group
|
||||
\s* # Any number of spaces.
|
||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||
""", re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe.
|
||||
|
||||
|
||||
# At long last, here is the cookie class. Using this class is almost just like
|
||||
# using a dictionary. See this module's docstring for example usage.
|
||||
#
|
||||
class BaseCookie(dict):
|
||||
"""A container class for a set of Morsels."""
|
||||
|
||||
def value_decode(self, val):
|
||||
"""real_value, coded_value = value_decode(STRING)
|
||||
Called prior to setting a cookie's value from the network
|
||||
representation. The VALUE is the value read from HTTP
|
||||
header.
|
||||
Override this function to modify the behavior of cookies.
|
||||
"""
|
||||
return val, val
|
||||
|
||||
def value_encode(self, val):
|
||||
"""real_value, coded_value = value_encode(VALUE)
|
||||
Called prior to setting a cookie's value from the dictionary
|
||||
representation. The VALUE is the value being assigned.
|
||||
Override this function to modify the behavior of cookies.
|
||||
"""
|
||||
strval = str(val)
|
||||
return strval, strval
|
||||
|
||||
def __init__(self, input=None):
|
||||
if input:
|
||||
self.load(input)
|
||||
|
||||
def __set(self, key, real_value, coded_value):
|
||||
"""Private method for setting a cookie's value"""
|
||||
M = self.get(key, Morsel())
|
||||
M.set(key, real_value, coded_value)
|
||||
dict.__setitem__(self, key, M)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Dictionary style assignment."""
|
||||
if isinstance(value, Morsel):
|
||||
# allow assignment of constructed Morsels (e.g. for pickling)
|
||||
dict.__setitem__(self, key, value)
|
||||
else:
|
||||
rval, cval = self.value_encode(value)
|
||||
self.__set(key, rval, cval)
|
||||
|
||||
def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"):
|
||||
"""Return a string suitable for HTTP."""
|
||||
result = []
|
||||
items = sorted(self.items())
|
||||
for key, value in items:
|
||||
result.append(value.output(attrs, header))
|
||||
return sep.join(result)
|
||||
|
||||
__str__ = output
|
||||
|
||||
def __repr__(self):
|
||||
l = []
|
||||
items = sorted(self.items())
|
||||
for key, value in items:
|
||||
l.append('%s=%s' % (key, repr(value.value)))
|
||||
return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l))
|
||||
|
||||
def js_output(self, attrs=None):
|
||||
"""Return a string suitable for JavaScript."""
|
||||
result = []
|
||||
items = sorted(self.items())
|
||||
for key, value in items:
|
||||
result.append(value.js_output(attrs))
|
||||
return _nulljoin(result)
|
||||
|
||||
def load(self, rawdata):
|
||||
"""Load cookies from a string (presumably HTTP_COOKIE) or
|
||||
from a dictionary. Loading cookies from a dictionary 'd'
|
||||
is equivalent to calling:
|
||||
map(Cookie.__setitem__, d.keys(), d.values())
|
||||
"""
|
||||
if isinstance(rawdata, str):
|
||||
self.__parse_string(rawdata)
|
||||
else:
|
||||
# self.update() wouldn't call our custom __setitem__
|
||||
for key, value in rawdata.items():
|
||||
self[key] = value
|
||||
return
|
||||
|
||||
def __parse_string(self, str, patt=_CookiePattern):
|
||||
i = 0 # Our starting point
|
||||
n = len(str) # Length of string
|
||||
parsed_items = [] # Parsed (type, key, value) triples
|
||||
morsel_seen = False # A key=value pair was previously encountered
|
||||
|
||||
TYPE_ATTRIBUTE = 1
|
||||
TYPE_KEYVALUE = 2
|
||||
|
||||
# We first parse the whole cookie string and reject it if it's
|
||||
# syntactically invalid (this helps avoid some classes of injection
|
||||
# attacks).
|
||||
while 0 <= i < n:
|
||||
# Start looking for a cookie
|
||||
match = patt.match(str, i)
|
||||
if not match:
|
||||
# No more cookies
|
||||
break
|
||||
|
||||
key, value = match.group("key"), match.group("val")
|
||||
i = match.end(0)
|
||||
|
||||
if key[0] == "$":
|
||||
if not morsel_seen:
|
||||
# We ignore attributes which pertain to the cookie
|
||||
# mechanism as a whole, such as "$Version".
|
||||
# See RFC 2965. (Does anyone care?)
|
||||
continue
|
||||
parsed_items.append((TYPE_ATTRIBUTE, key[1:], value))
|
||||
elif key.lower() in Morsel._reserved:
|
||||
if not morsel_seen:
|
||||
# Invalid cookie string
|
||||
return
|
||||
if value is None:
|
||||
if key.lower() in Morsel._flags:
|
||||
parsed_items.append((TYPE_ATTRIBUTE, key, True))
|
||||
else:
|
||||
# Invalid cookie string
|
||||
return
|
||||
else:
|
||||
parsed_items.append((TYPE_ATTRIBUTE, key, _unquote(value)))
|
||||
elif value is not None:
|
||||
parsed_items.append((TYPE_KEYVALUE, key, self.value_decode(value)))
|
||||
morsel_seen = True
|
||||
else:
|
||||
# Invalid cookie string
|
||||
return
|
||||
|
||||
# The cookie string is valid, apply it.
|
||||
M = None # current morsel
|
||||
for tp, key, value in parsed_items:
|
||||
if tp == TYPE_ATTRIBUTE:
|
||||
assert M is not None
|
||||
M[key] = value
|
||||
else:
|
||||
assert tp == TYPE_KEYVALUE
|
||||
rval, cval = value
|
||||
self.__set(key, rval, cval)
|
||||
M = self[key]
|
||||
|
||||
|
||||
class SimpleCookie(BaseCookie):
|
||||
"""
|
||||
SimpleCookie supports strings as cookie values. When setting
|
||||
the value using the dictionary assignment notation, SimpleCookie
|
||||
calls the builtin str() to convert the value to a string. Values
|
||||
received from HTTP are kept as strings.
|
||||
"""
|
||||
def value_decode(self, val):
|
||||
return _unquote(val), val
|
||||
|
||||
def value_encode(self, val):
|
||||
strval = str(val)
|
||||
return strval, _quote(strval)
|
||||
1211
Lib/http/server.py
Normal file
1211
Lib/http/server.py
Normal file
File diff suppressed because it is too large
Load Diff
597
Lib/mimetypes.py
vendored
Normal file
597
Lib/mimetypes.py
vendored
Normal file
@@ -0,0 +1,597 @@
|
||||
"""Guess the MIME type of a file.
|
||||
|
||||
This module defines two useful functions:
|
||||
|
||||
guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
|
||||
|
||||
guess_extension(type, strict=True) -- guess the extension for a given MIME type.
|
||||
|
||||
It also contains the following, for tuning the behavior:
|
||||
|
||||
Data:
|
||||
|
||||
knownfiles -- list of files to parse
|
||||
inited -- flag set when init() has been called
|
||||
suffix_map -- dictionary mapping suffixes to suffixes
|
||||
encodings_map -- dictionary mapping suffixes to encodings
|
||||
types_map -- dictionary mapping suffixes to types
|
||||
|
||||
Functions:
|
||||
|
||||
init([files]) -- parse a list of files, default knownfiles (on Windows, the
|
||||
default values are taken from the registry)
|
||||
read_mime_types(file) -- parse one file, return a dictionary or None
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import posixpath
|
||||
import urllib.parse
|
||||
try:
|
||||
import winreg as _winreg
|
||||
except ImportError:
|
||||
_winreg = None
|
||||
|
||||
__all__ = [
|
||||
"knownfiles", "inited", "MimeTypes",
|
||||
"guess_type", "guess_all_extensions", "guess_extension",
|
||||
"add_type", "init", "read_mime_types",
|
||||
"suffix_map", "encodings_map", "types_map", "common_types"
|
||||
]
|
||||
|
||||
knownfiles = [
|
||||
"/etc/mime.types",
|
||||
"/etc/httpd/mime.types", # Mac OS X
|
||||
"/etc/httpd/conf/mime.types", # Apache
|
||||
"/etc/apache/mime.types", # Apache 1
|
||||
"/etc/apache2/mime.types", # Apache 2
|
||||
"/usr/local/etc/httpd/conf/mime.types",
|
||||
"/usr/local/lib/netscape/mime.types",
|
||||
"/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
|
||||
"/usr/local/etc/mime.types", # Apache 1.3
|
||||
]
|
||||
|
||||
inited = False
|
||||
_db = None
|
||||
|
||||
|
||||
class MimeTypes:
|
||||
"""MIME-types datastore.
|
||||
|
||||
This datastore can handle information from mime.types-style files
|
||||
and supports basic determination of MIME type from a filename or
|
||||
URL, and can guess a reasonable extension given a MIME type.
|
||||
"""
|
||||
|
||||
def __init__(self, filenames=(), strict=True):
|
||||
if not inited:
|
||||
init()
|
||||
self.encodings_map = encodings_map.copy()
|
||||
self.suffix_map = suffix_map.copy()
|
||||
self.types_map = ({}, {}) # dict for (non-strict, strict)
|
||||
self.types_map_inv = ({}, {})
|
||||
for (ext, type) in types_map.items():
|
||||
self.add_type(type, ext, True)
|
||||
for (ext, type) in common_types.items():
|
||||
self.add_type(type, ext, False)
|
||||
for name in filenames:
|
||||
self.read(name, strict)
|
||||
|
||||
def add_type(self, type, ext, strict=True):
|
||||
"""Add a mapping between a type and an extension.
|
||||
|
||||
When the extension is already known, the new
|
||||
type will replace the old one. When the type
|
||||
is already known the extension will be added
|
||||
to the list of known extensions.
|
||||
|
||||
If strict is true, information will be added to
|
||||
list of standard types, else to the list of non-standard
|
||||
types.
|
||||
"""
|
||||
self.types_map[strict][ext] = type
|
||||
exts = self.types_map_inv[strict].setdefault(type, [])
|
||||
if ext not in exts:
|
||||
exts.append(ext)
|
||||
|
||||
def guess_type(self, url, strict=True):
|
||||
"""Guess the type of a file based on its URL.
|
||||
|
||||
Return value is a tuple (type, encoding) where type is None if
|
||||
the type can't be guessed (no or unknown suffix) or a string
|
||||
of the form type/subtype, usable for a MIME Content-type
|
||||
header; and encoding is None for no encoding or the name of
|
||||
the program used to encode (e.g. compress or gzip). The
|
||||
mappings are table driven. Encoding suffixes are case
|
||||
sensitive; type suffixes are first tried case sensitive, then
|
||||
case insensitive.
|
||||
|
||||
The suffixes .tgz, .taz and .tz (case sensitive!) are all
|
||||
mapped to '.tar.gz'. (This is table-driven too, using the
|
||||
dictionary suffix_map.)
|
||||
|
||||
Optional `strict' argument when False adds a bunch of commonly found,
|
||||
but non-standard types.
|
||||
"""
|
||||
scheme, url = urllib.parse.splittype(url)
|
||||
if scheme == 'data':
|
||||
# syntax of data URLs:
|
||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
||||
# data := *urlchar
|
||||
# parameter := attribute "=" value
|
||||
# type/subtype defaults to "text/plain"
|
||||
comma = url.find(',')
|
||||
if comma < 0:
|
||||
# bad data URL
|
||||
return None, None
|
||||
semi = url.find(';', 0, comma)
|
||||
if semi >= 0:
|
||||
type = url[:semi]
|
||||
else:
|
||||
type = url[:comma]
|
||||
if '=' in type or '/' not in type:
|
||||
type = 'text/plain'
|
||||
return type, None # never compressed, so encoding is None
|
||||
base, ext = posixpath.splitext(url)
|
||||
while ext in self.suffix_map:
|
||||
base, ext = posixpath.splitext(base + self.suffix_map[ext])
|
||||
if ext in self.encodings_map:
|
||||
encoding = self.encodings_map[ext]
|
||||
base, ext = posixpath.splitext(base)
|
||||
else:
|
||||
encoding = None
|
||||
types_map = self.types_map[True]
|
||||
if ext in types_map:
|
||||
return types_map[ext], encoding
|
||||
elif ext.lower() in types_map:
|
||||
return types_map[ext.lower()], encoding
|
||||
elif strict:
|
||||
return None, encoding
|
||||
types_map = self.types_map[False]
|
||||
if ext in types_map:
|
||||
return types_map[ext], encoding
|
||||
elif ext.lower() in types_map:
|
||||
return types_map[ext.lower()], encoding
|
||||
else:
|
||||
return None, encoding
|
||||
|
||||
def guess_all_extensions(self, type, strict=True):
|
||||
"""Guess the extensions for a file based on its MIME type.
|
||||
|
||||
Return value is a list of strings giving the possible filename
|
||||
extensions, including the leading dot ('.'). The extension is not
|
||||
guaranteed to have been associated with any particular data stream,
|
||||
but would be mapped to the MIME type `type' by guess_type().
|
||||
|
||||
Optional `strict' argument when false adds a bunch of commonly found,
|
||||
but non-standard types.
|
||||
"""
|
||||
type = type.lower()
|
||||
extensions = self.types_map_inv[True].get(type, [])
|
||||
if not strict:
|
||||
for ext in self.types_map_inv[False].get(type, []):
|
||||
if ext not in extensions:
|
||||
extensions.append(ext)
|
||||
return extensions
|
||||
|
||||
def guess_extension(self, type, strict=True):
|
||||
"""Guess the extension for a file based on its MIME type.
|
||||
|
||||
Return value is a string giving a filename extension,
|
||||
including the leading dot ('.'). The extension is not
|
||||
guaranteed to have been associated with any particular data
|
||||
stream, but would be mapped to the MIME type `type' by
|
||||
guess_type(). If no extension can be guessed for `type', None
|
||||
is returned.
|
||||
|
||||
Optional `strict' argument when false adds a bunch of commonly found,
|
||||
but non-standard types.
|
||||
"""
|
||||
extensions = self.guess_all_extensions(type, strict)
|
||||
if not extensions:
|
||||
return None
|
||||
return extensions[0]
|
||||
|
||||
def read(self, filename, strict=True):
|
||||
"""
|
||||
Read a single mime.types-format file, specified by pathname.
|
||||
|
||||
If strict is true, information will be added to
|
||||
list of standard types, else to the list of non-standard
|
||||
types.
|
||||
"""
|
||||
with open(filename, encoding='utf-8') as fp:
|
||||
self.readfp(fp, strict)
|
||||
|
||||
def readfp(self, fp, strict=True):
|
||||
"""
|
||||
Read a single mime.types-format file.
|
||||
|
||||
If strict is true, information will be added to
|
||||
list of standard types, else to the list of non-standard
|
||||
types.
|
||||
"""
|
||||
while 1:
|
||||
line = fp.readline()
|
||||
if not line:
|
||||
break
|
||||
words = line.split()
|
||||
for i in range(len(words)):
|
||||
if words[i][0] == '#':
|
||||
del words[i:]
|
||||
break
|
||||
if not words:
|
||||
continue
|
||||
type, suffixes = words[0], words[1:]
|
||||
for suff in suffixes:
|
||||
self.add_type(type, '.' + suff, strict)
|
||||
|
||||
def read_windows_registry(self, strict=True):
|
||||
"""
|
||||
Load the MIME types database from Windows registry.
|
||||
|
||||
If strict is true, information will be added to
|
||||
list of standard types, else to the list of non-standard
|
||||
types.
|
||||
"""
|
||||
|
||||
# Windows only
|
||||
if not _winreg:
|
||||
return
|
||||
|
||||
def enum_types(mimedb):
|
||||
i = 0
|
||||
while True:
|
||||
try:
|
||||
ctype = _winreg.EnumKey(mimedb, i)
|
||||
except EnvironmentError:
|
||||
break
|
||||
else:
|
||||
if '\0' not in ctype:
|
||||
yield ctype
|
||||
i += 1
|
||||
|
||||
with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
|
||||
for subkeyname in enum_types(hkcr):
|
||||
try:
|
||||
with _winreg.OpenKey(hkcr, subkeyname) as subkey:
|
||||
# Only check file extensions
|
||||
if not subkeyname.startswith("."):
|
||||
continue
|
||||
# raises EnvironmentError if no 'Content Type' value
|
||||
mimetype, datatype = _winreg.QueryValueEx(
|
||||
subkey, 'Content Type')
|
||||
if datatype != _winreg.REG_SZ:
|
||||
continue
|
||||
self.add_type(mimetype, subkeyname, strict)
|
||||
except EnvironmentError:
|
||||
continue
|
||||
|
||||
def guess_type(url, strict=True):
|
||||
"""Guess the type of a file based on its URL.
|
||||
|
||||
Return value is a tuple (type, encoding) where type is None if the
|
||||
type can't be guessed (no or unknown suffix) or a string of the
|
||||
form type/subtype, usable for a MIME Content-type header; and
|
||||
encoding is None for no encoding or the name of the program used
|
||||
to encode (e.g. compress or gzip). The mappings are table
|
||||
driven. Encoding suffixes are case sensitive; type suffixes are
|
||||
first tried case sensitive, then case insensitive.
|
||||
|
||||
The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
|
||||
to ".tar.gz". (This is table-driven too, using the dictionary
|
||||
suffix_map).
|
||||
|
||||
Optional `strict' argument when false adds a bunch of commonly found, but
|
||||
non-standard types.
|
||||
"""
|
||||
if _db is None:
|
||||
init()
|
||||
return _db.guess_type(url, strict)
|
||||
|
||||
|
||||
def guess_all_extensions(type, strict=True):
|
||||
"""Guess the extensions for a file based on its MIME type.
|
||||
|
||||
Return value is a list of strings giving the possible filename
|
||||
extensions, including the leading dot ('.'). The extension is not
|
||||
guaranteed to have been associated with any particular data
|
||||
stream, but would be mapped to the MIME type `type' by
|
||||
guess_type(). If no extension can be guessed for `type', None
|
||||
is returned.
|
||||
|
||||
Optional `strict' argument when false adds a bunch of commonly found,
|
||||
but non-standard types.
|
||||
"""
|
||||
if _db is None:
|
||||
init()
|
||||
return _db.guess_all_extensions(type, strict)
|
||||
|
||||
def guess_extension(type, strict=True):
|
||||
"""Guess the extension for a file based on its MIME type.
|
||||
|
||||
Return value is a string giving a filename extension, including the
|
||||
leading dot ('.'). The extension is not guaranteed to have been
|
||||
associated with any particular data stream, but would be mapped to the
|
||||
MIME type `type' by guess_type(). If no extension can be guessed for
|
||||
`type', None is returned.
|
||||
|
||||
Optional `strict' argument when false adds a bunch of commonly found,
|
||||
but non-standard types.
|
||||
"""
|
||||
if _db is None:
|
||||
init()
|
||||
return _db.guess_extension(type, strict)
|
||||
|
||||
def add_type(type, ext, strict=True):
|
||||
"""Add a mapping between a type and an extension.
|
||||
|
||||
When the extension is already known, the new
|
||||
type will replace the old one. When the type
|
||||
is already known the extension will be added
|
||||
to the list of known extensions.
|
||||
|
||||
If strict is true, information will be added to
|
||||
list of standard types, else to the list of non-standard
|
||||
types.
|
||||
"""
|
||||
if _db is None:
|
||||
init()
|
||||
return _db.add_type(type, ext, strict)
|
||||
|
||||
|
||||
def init(files=None):
|
||||
global suffix_map, types_map, encodings_map, common_types
|
||||
global inited, _db
|
||||
inited = True # so that MimeTypes.__init__() doesn't call us again
|
||||
db = MimeTypes()
|
||||
if files is None:
|
||||
if _winreg:
|
||||
db.read_windows_registry()
|
||||
files = knownfiles
|
||||
for file in files:
|
||||
if os.path.isfile(file):
|
||||
db.read(file)
|
||||
encodings_map = db.encodings_map
|
||||
suffix_map = db.suffix_map
|
||||
types_map = db.types_map[True]
|
||||
common_types = db.types_map[False]
|
||||
# Make the DB a global variable now that it is fully initialized
|
||||
_db = db
|
||||
|
||||
|
||||
def read_mime_types(file):
|
||||
try:
|
||||
f = open(file)
|
||||
except OSError:
|
||||
return None
|
||||
with f:
|
||||
db = MimeTypes()
|
||||
db.readfp(f, True)
|
||||
return db.types_map[True]
|
||||
|
||||
|
||||
def _default_mime_types():
|
||||
global suffix_map
|
||||
global encodings_map
|
||||
global types_map
|
||||
global common_types
|
||||
|
||||
suffix_map = {
|
||||
'.svgz': '.svg.gz',
|
||||
'.tgz': '.tar.gz',
|
||||
'.taz': '.tar.gz',
|
||||
'.tz': '.tar.gz',
|
||||
'.tbz2': '.tar.bz2',
|
||||
'.txz': '.tar.xz',
|
||||
}
|
||||
|
||||
encodings_map = {
|
||||
'.gz': 'gzip',
|
||||
'.Z': 'compress',
|
||||
'.bz2': 'bzip2',
|
||||
'.xz': 'xz',
|
||||
}
|
||||
|
||||
# Before adding new types, make sure they are either registered with IANA,
|
||||
# at http://www.iana.org/assignments/media-types
|
||||
# or extensions, i.e. using the x- prefix
|
||||
|
||||
# If you add to these, please keep them sorted!
|
||||
types_map = {
|
||||
'.a' : 'application/octet-stream',
|
||||
'.ai' : 'application/postscript',
|
||||
'.aif' : 'audio/x-aiff',
|
||||
'.aifc' : 'audio/x-aiff',
|
||||
'.aiff' : 'audio/x-aiff',
|
||||
'.au' : 'audio/basic',
|
||||
'.avi' : 'video/x-msvideo',
|
||||
'.bat' : 'text/plain',
|
||||
'.bcpio' : 'application/x-bcpio',
|
||||
'.bin' : 'application/octet-stream',
|
||||
'.bmp' : 'image/x-ms-bmp',
|
||||
'.c' : 'text/plain',
|
||||
# Duplicates :(
|
||||
'.cdf' : 'application/x-cdf',
|
||||
'.cdf' : 'application/x-netcdf',
|
||||
'.cpio' : 'application/x-cpio',
|
||||
'.csh' : 'application/x-csh',
|
||||
'.css' : 'text/css',
|
||||
'.csv' : 'text/csv',
|
||||
'.dll' : 'application/octet-stream',
|
||||
'.doc' : 'application/msword',
|
||||
'.dot' : 'application/msword',
|
||||
'.dvi' : 'application/x-dvi',
|
||||
'.eml' : 'message/rfc822',
|
||||
'.eps' : 'application/postscript',
|
||||
'.etx' : 'text/x-setext',
|
||||
'.exe' : 'application/octet-stream',
|
||||
'.gif' : 'image/gif',
|
||||
'.gtar' : 'application/x-gtar',
|
||||
'.h' : 'text/plain',
|
||||
'.hdf' : 'application/x-hdf',
|
||||
'.htm' : 'text/html',
|
||||
'.html' : 'text/html',
|
||||
'.ico' : 'image/vnd.microsoft.icon',
|
||||
'.ief' : 'image/ief',
|
||||
'.jpe' : 'image/jpeg',
|
||||
'.jpeg' : 'image/jpeg',
|
||||
'.jpg' : 'image/jpeg',
|
||||
'.js' : 'application/javascript',
|
||||
'.ksh' : 'text/plain',
|
||||
'.latex' : 'application/x-latex',
|
||||
'.m1v' : 'video/mpeg',
|
||||
'.m3u' : 'application/vnd.apple.mpegurl',
|
||||
'.m3u8' : 'application/vnd.apple.mpegurl',
|
||||
'.man' : 'application/x-troff-man',
|
||||
'.me' : 'application/x-troff-me',
|
||||
'.mht' : 'message/rfc822',
|
||||
'.mhtml' : 'message/rfc822',
|
||||
'.mif' : 'application/x-mif',
|
||||
'.mov' : 'video/quicktime',
|
||||
'.movie' : 'video/x-sgi-movie',
|
||||
'.mp2' : 'audio/mpeg',
|
||||
'.mp3' : 'audio/mpeg',
|
||||
'.mp4' : 'video/mp4',
|
||||
'.mpa' : 'video/mpeg',
|
||||
'.mpe' : 'video/mpeg',
|
||||
'.mpeg' : 'video/mpeg',
|
||||
'.mpg' : 'video/mpeg',
|
||||
'.ms' : 'application/x-troff-ms',
|
||||
'.nc' : 'application/x-netcdf',
|
||||
'.nws' : 'message/rfc822',
|
||||
'.o' : 'application/octet-stream',
|
||||
'.obj' : 'application/octet-stream',
|
||||
'.oda' : 'application/oda',
|
||||
'.p12' : 'application/x-pkcs12',
|
||||
'.p7c' : 'application/pkcs7-mime',
|
||||
'.pbm' : 'image/x-portable-bitmap',
|
||||
'.pdf' : 'application/pdf',
|
||||
'.pfx' : 'application/x-pkcs12',
|
||||
'.pgm' : 'image/x-portable-graymap',
|
||||
'.pl' : 'text/plain',
|
||||
'.png' : 'image/png',
|
||||
'.pnm' : 'image/x-portable-anymap',
|
||||
'.pot' : 'application/vnd.ms-powerpoint',
|
||||
'.ppa' : 'application/vnd.ms-powerpoint',
|
||||
'.ppm' : 'image/x-portable-pixmap',
|
||||
'.pps' : 'application/vnd.ms-powerpoint',
|
||||
'.ppt' : 'application/vnd.ms-powerpoint',
|
||||
'.ps' : 'application/postscript',
|
||||
'.pwz' : 'application/vnd.ms-powerpoint',
|
||||
'.py' : 'text/x-python',
|
||||
'.pyc' : 'application/x-python-code',
|
||||
'.pyo' : 'application/x-python-code',
|
||||
'.qt' : 'video/quicktime',
|
||||
'.ra' : 'audio/x-pn-realaudio',
|
||||
'.ram' : 'application/x-pn-realaudio',
|
||||
'.ras' : 'image/x-cmu-raster',
|
||||
'.rdf' : 'application/xml',
|
||||
'.rgb' : 'image/x-rgb',
|
||||
'.roff' : 'application/x-troff',
|
||||
'.rtx' : 'text/richtext',
|
||||
'.sgm' : 'text/x-sgml',
|
||||
'.sgml' : 'text/x-sgml',
|
||||
'.sh' : 'application/x-sh',
|
||||
'.shar' : 'application/x-shar',
|
||||
'.snd' : 'audio/basic',
|
||||
'.so' : 'application/octet-stream',
|
||||
'.src' : 'application/x-wais-source',
|
||||
'.sv4cpio': 'application/x-sv4cpio',
|
||||
'.sv4crc' : 'application/x-sv4crc',
|
||||
'.svg' : 'image/svg+xml',
|
||||
'.swf' : 'application/x-shockwave-flash',
|
||||
'.t' : 'application/x-troff',
|
||||
'.tar' : 'application/x-tar',
|
||||
'.tcl' : 'application/x-tcl',
|
||||
'.tex' : 'application/x-tex',
|
||||
'.texi' : 'application/x-texinfo',
|
||||
'.texinfo': 'application/x-texinfo',
|
||||
'.tif' : 'image/tiff',
|
||||
'.tiff' : 'image/tiff',
|
||||
'.tr' : 'application/x-troff',
|
||||
'.tsv' : 'text/tab-separated-values',
|
||||
'.txt' : 'text/plain',
|
||||
'.ustar' : 'application/x-ustar',
|
||||
'.vcf' : 'text/x-vcard',
|
||||
'.wav' : 'audio/x-wav',
|
||||
'.webm' : 'video/webm',
|
||||
'.wiz' : 'application/msword',
|
||||
'.wsdl' : 'application/xml',
|
||||
'.xbm' : 'image/x-xbitmap',
|
||||
'.xlb' : 'application/vnd.ms-excel',
|
||||
# Duplicates :(
|
||||
'.xls' : 'application/excel',
|
||||
'.xls' : 'application/vnd.ms-excel',
|
||||
'.xml' : 'text/xml',
|
||||
'.xpdl' : 'application/xml',
|
||||
'.xpm' : 'image/x-xpixmap',
|
||||
'.xsl' : 'application/xml',
|
||||
'.xwd' : 'image/x-xwindowdump',
|
||||
'.zip' : 'application/zip',
|
||||
}
|
||||
|
||||
# These are non-standard types, commonly found in the wild. They will
|
||||
# only match if strict=0 flag is given to the API methods.
|
||||
|
||||
# Please sort these too
|
||||
common_types = {
|
||||
'.jpg' : 'image/jpg',
|
||||
'.mid' : 'audio/midi',
|
||||
'.midi': 'audio/midi',
|
||||
'.pct' : 'image/pict',
|
||||
'.pic' : 'image/pict',
|
||||
'.pict': 'image/pict',
|
||||
'.rtf' : 'application/rtf',
|
||||
'.xul' : 'text/xul'
|
||||
}
|
||||
|
||||
|
||||
_default_mime_types()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import getopt
|
||||
|
||||
USAGE = """\
|
||||
Usage: mimetypes.py [options] type
|
||||
|
||||
Options:
|
||||
--help / -h -- print this message and exit
|
||||
--lenient / -l -- additionally search of some common, but non-standard
|
||||
types.
|
||||
--extension / -e -- guess extension instead of type
|
||||
|
||||
More than one type argument may be given.
|
||||
"""
|
||||
|
||||
def usage(code, msg=''):
|
||||
print(USAGE)
|
||||
if msg: print(msg)
|
||||
sys.exit(code)
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'hle',
|
||||
['help', 'lenient', 'extension'])
|
||||
except getopt.error as msg:
|
||||
usage(1, msg)
|
||||
|
||||
strict = 1
|
||||
extension = 0
|
||||
for opt, arg in opts:
|
||||
if opt in ('-h', '--help'):
|
||||
usage(0)
|
||||
elif opt in ('-l', '--lenient'):
|
||||
strict = 0
|
||||
elif opt in ('-e', '--extension'):
|
||||
extension = 1
|
||||
for gtype in args:
|
||||
if extension:
|
||||
guess = guess_extension(gtype, strict)
|
||||
if not guess: print("I don't know anything about type", gtype)
|
||||
else: print(guess)
|
||||
else:
|
||||
guess, encoding = guess_type(gtype, strict)
|
||||
if not guess: print("I don't know anything about type", gtype)
|
||||
else: print('type:', guess, 'encoding:', encoding)
|
||||
795
Lib/socketserver.py
vendored
Normal file
795
Lib/socketserver.py
vendored
Normal file
@@ -0,0 +1,795 @@
|
||||
"""Generic socket server classes.
|
||||
|
||||
This module tries to capture the various aspects of defining a server:
|
||||
|
||||
For socket-based servers:
|
||||
|
||||
- address family:
|
||||
- AF_INET{,6}: IP (Internet Protocol) sockets (default)
|
||||
- AF_UNIX: Unix domain sockets
|
||||
- others, e.g. AF_DECNET are conceivable (see <socket.h>
|
||||
- socket type:
|
||||
- SOCK_STREAM (reliable stream, e.g. TCP)
|
||||
- SOCK_DGRAM (datagrams, e.g. UDP)
|
||||
|
||||
For request-based servers (including socket-based):
|
||||
|
||||
- client address verification before further looking at the request
|
||||
(This is actually a hook for any processing that needs to look
|
||||
at the request before anything else, e.g. logging)
|
||||
- how to handle multiple requests:
|
||||
- synchronous (one request is handled at a time)
|
||||
- forking (each request is handled by a new process)
|
||||
- threading (each request is handled by a new thread)
|
||||
|
||||
The classes in this module favor the server type that is simplest to
|
||||
write: a synchronous TCP/IP server. This is bad class design, but
|
||||
save some typing. (There's also the issue that a deep class hierarchy
|
||||
slows down method lookups.)
|
||||
|
||||
There are five classes in an inheritance diagram, four of which represent
|
||||
synchronous servers of four types:
|
||||
|
||||
+------------+
|
||||
| BaseServer |
|
||||
+------------+
|
||||
|
|
||||
v
|
||||
+-----------+ +------------------+
|
||||
| TCPServer |------->| UnixStreamServer |
|
||||
+-----------+ +------------------+
|
||||
|
|
||||
v
|
||||
+-----------+ +--------------------+
|
||||
| UDPServer |------->| UnixDatagramServer |
|
||||
+-----------+ +--------------------+
|
||||
|
||||
Note that UnixDatagramServer derives from UDPServer, not from
|
||||
UnixStreamServer -- the only difference between an IP and a Unix
|
||||
stream server is the address family, which is simply repeated in both
|
||||
unix server classes.
|
||||
|
||||
Forking and threading versions of each type of server can be created
|
||||
using the ForkingMixIn and ThreadingMixIn mix-in classes. For
|
||||
instance, a threading UDP server class is created as follows:
|
||||
|
||||
class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
|
||||
|
||||
The Mix-in class must come first, since it overrides a method defined
|
||||
in UDPServer! Setting the various member variables also changes
|
||||
the behavior of the underlying server mechanism.
|
||||
|
||||
To implement a service, you must derive a class from
|
||||
BaseRequestHandler and redefine its handle() method. You can then run
|
||||
various versions of the service by combining one of the server classes
|
||||
with your request handler class.
|
||||
|
||||
The request handler class must be different for datagram or stream
|
||||
services. This can be hidden by using the request handler
|
||||
subclasses StreamRequestHandler or DatagramRequestHandler.
|
||||
|
||||
Of course, you still have to use your head!
|
||||
|
||||
For instance, it makes no sense to use a forking server if the service
|
||||
contains state in memory that can be modified by requests (since the
|
||||
modifications in the child process would never reach the initial state
|
||||
kept in the parent process and passed to each child). In this case,
|
||||
you can use a threading server, but you will probably have to use
|
||||
locks to avoid two requests that come in nearly simultaneous to apply
|
||||
conflicting changes to the server state.
|
||||
|
||||
On the other hand, if you are building e.g. an HTTP server, where all
|
||||
data is stored externally (e.g. in the file system), a synchronous
|
||||
class will essentially render the service "deaf" while one request is
|
||||
being handled -- which may be for a very long time if a client is slow
|
||||
to read all the data it has requested. Here a threading or forking
|
||||
server is appropriate.
|
||||
|
||||
In some cases, it may be appropriate to process part of a request
|
||||
synchronously, but to finish processing in a forked child depending on
|
||||
the request data. This can be implemented by using a synchronous
|
||||
server and doing an explicit fork in the request handler class
|
||||
handle() method.
|
||||
|
||||
Another approach to handling multiple simultaneous requests in an
|
||||
environment that supports neither threads nor fork (or where these are
|
||||
too expensive or inappropriate for the service) is to maintain an
|
||||
explicit table of partially finished requests and to use a selector to
|
||||
decide which request to work on next (or whether to handle a new
|
||||
incoming request). This is particularly important for stream services
|
||||
where each client can potentially be connected for a long time (if
|
||||
threads or subprocesses cannot be used).
|
||||
|
||||
Future work:
|
||||
- Standard classes for Sun RPC (which uses either UDP or TCP)
|
||||
- Standard mix-in classes to implement various authentication
|
||||
and encryption schemes
|
||||
|
||||
XXX Open problems:
|
||||
- What to do with out-of-band data?
|
||||
|
||||
BaseServer:
|
||||
- split generic "request" functionality out into BaseServer class.
|
||||
Copyright (C) 2000 Luke Kenneth Casson Leighton <lkcl@samba.org>
|
||||
|
||||
example: read entries from a SQL database (requires overriding
|
||||
get_request() to return a table entry from the database).
|
||||
entry is processed by a RequestHandlerClass.
|
||||
|
||||
"""
|
||||
|
||||
# Author of the BaseServer patch: Luke Kenneth Casson Leighton
|
||||
|
||||
__version__ = "0.4"
|
||||
|
||||
|
||||
import socket
|
||||
import selectors
|
||||
import os
|
||||
import errno
|
||||
import sys
|
||||
try:
|
||||
import threading
|
||||
except ImportError:
|
||||
import dummy_threading as threading
|
||||
from io import BufferedIOBase
|
||||
from time import monotonic as time
|
||||
|
||||
__all__ = ["BaseServer", "TCPServer", "UDPServer",
|
||||
"ThreadingUDPServer", "ThreadingTCPServer",
|
||||
"BaseRequestHandler", "StreamRequestHandler",
|
||||
"DatagramRequestHandler", "ThreadingMixIn"]
|
||||
if hasattr(os, "fork"):
|
||||
__all__.extend(["ForkingUDPServer","ForkingTCPServer", "ForkingMixIn"])
|
||||
if hasattr(socket, "AF_UNIX"):
|
||||
__all__.extend(["UnixStreamServer","UnixDatagramServer",
|
||||
"ThreadingUnixStreamServer",
|
||||
"ThreadingUnixDatagramServer"])
|
||||
|
||||
# poll/select have the advantage of not requiring any extra file descriptor,
|
||||
# contrarily to epoll/kqueue (also, they require a single syscall).
|
||||
if hasattr(selectors, 'PollSelector'):
|
||||
_ServerSelector = selectors.PollSelector
|
||||
else:
|
||||
_ServerSelector = selectors.SelectSelector
|
||||
|
||||
|
||||
class BaseServer:
|
||||
|
||||
"""Base class for server classes.
|
||||
|
||||
Methods for the caller:
|
||||
|
||||
- __init__(server_address, RequestHandlerClass)
|
||||
- serve_forever(poll_interval=0.5)
|
||||
- shutdown()
|
||||
- handle_request() # if you do not use serve_forever()
|
||||
- fileno() -> int # for selector
|
||||
|
||||
Methods that may be overridden:
|
||||
|
||||
- server_bind()
|
||||
- server_activate()
|
||||
- get_request() -> request, client_address
|
||||
- handle_timeout()
|
||||
- verify_request(request, client_address)
|
||||
- server_close()
|
||||
- process_request(request, client_address)
|
||||
- shutdown_request(request)
|
||||
- close_request(request)
|
||||
- service_actions()
|
||||
- handle_error()
|
||||
|
||||
Methods for derived classes:
|
||||
|
||||
- finish_request(request, client_address)
|
||||
|
||||
Class variables that may be overridden by derived classes or
|
||||
instances:
|
||||
|
||||
- timeout
|
||||
- address_family
|
||||
- socket_type
|
||||
- allow_reuse_address
|
||||
|
||||
Instance variables:
|
||||
|
||||
- RequestHandlerClass
|
||||
- socket
|
||||
|
||||
"""
|
||||
|
||||
timeout = None
|
||||
|
||||
def __init__(self, server_address, RequestHandlerClass):
|
||||
"""Constructor. May be extended, do not override."""
|
||||
self.server_address = server_address
|
||||
self.RequestHandlerClass = RequestHandlerClass
|
||||
self.__is_shut_down = threading.Event()
|
||||
self.__shutdown_request = False
|
||||
|
||||
def server_activate(self):
|
||||
"""Called by constructor to activate the server.
|
||||
|
||||
May be overridden.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
def serve_forever(self, poll_interval=0.5):
|
||||
"""Handle one request at a time until shutdown.
|
||||
|
||||
Polls for shutdown every poll_interval seconds. Ignores
|
||||
self.timeout. If you need to do periodic tasks, do them in
|
||||
another thread.
|
||||
"""
|
||||
self.__is_shut_down.clear()
|
||||
try:
|
||||
# XXX: Consider using another file descriptor or connecting to the
|
||||
# socket to wake this up instead of polling. Polling reduces our
|
||||
# responsiveness to a shutdown request and wastes cpu at all other
|
||||
# times.
|
||||
with _ServerSelector() as selector:
|
||||
selector.register(self, selectors.EVENT_READ)
|
||||
|
||||
while not self.__shutdown_request:
|
||||
ready = selector.select(poll_interval)
|
||||
if ready:
|
||||
self._handle_request_noblock()
|
||||
|
||||
self.service_actions()
|
||||
finally:
|
||||
self.__shutdown_request = False
|
||||
self.__is_shut_down.set()
|
||||
|
||||
def shutdown(self):
|
||||
"""Stops the serve_forever loop.
|
||||
|
||||
Blocks until the loop has finished. This must be called while
|
||||
serve_forever() is running in another thread, or it will
|
||||
deadlock.
|
||||
"""
|
||||
self.__shutdown_request = True
|
||||
self.__is_shut_down.wait()
|
||||
|
||||
def service_actions(self):
|
||||
"""Called by the serve_forever() loop.
|
||||
|
||||
May be overridden by a subclass / Mixin to implement any code that
|
||||
needs to be run during the loop.
|
||||
"""
|
||||
pass
|
||||
|
||||
# The distinction between handling, getting, processing and finishing a
|
||||
# request is fairly arbitrary. Remember:
|
||||
#
|
||||
# - handle_request() is the top-level call. It calls selector.select(),
|
||||
# get_request(), verify_request() and process_request()
|
||||
# - get_request() is different for stream or datagram sockets
|
||||
# - process_request() is the place that may fork a new process or create a
|
||||
# new thread to finish the request
|
||||
# - finish_request() instantiates the request handler class; this
|
||||
# constructor will handle the request all by itself
|
||||
|
||||
def handle_request(self):
|
||||
"""Handle one request, possibly blocking.
|
||||
|
||||
Respects self.timeout.
|
||||
"""
|
||||
# Support people who used socket.settimeout() to escape
|
||||
# handle_request before self.timeout was available.
|
||||
timeout = self.socket.gettimeout()
|
||||
if timeout is None:
|
||||
timeout = self.timeout
|
||||
elif self.timeout is not None:
|
||||
timeout = min(timeout, self.timeout)
|
||||
if timeout is not None:
|
||||
deadline = time() + timeout
|
||||
|
||||
# Wait until a request arrives or the timeout expires - the loop is
|
||||
# necessary to accommodate early wakeups due to EINTR.
|
||||
with _ServerSelector() as selector:
|
||||
selector.register(self, selectors.EVENT_READ)
|
||||
|
||||
while True:
|
||||
ready = selector.select(timeout)
|
||||
if ready:
|
||||
return self._handle_request_noblock()
|
||||
else:
|
||||
if timeout is not None:
|
||||
timeout = deadline - time()
|
||||
if timeout < 0:
|
||||
return self.handle_timeout()
|
||||
|
||||
def _handle_request_noblock(self):
|
||||
"""Handle one request, without blocking.
|
||||
|
||||
I assume that selector.select() has returned that the socket is
|
||||
readable before this function was called, so there should be no risk of
|
||||
blocking in get_request().
|
||||
"""
|
||||
try:
|
||||
request, client_address = self.get_request()
|
||||
except OSError:
|
||||
return
|
||||
if self.verify_request(request, client_address):
|
||||
try:
|
||||
self.process_request(request, client_address)
|
||||
except Exception:
|
||||
self.handle_error(request, client_address)
|
||||
self.shutdown_request(request)
|
||||
except:
|
||||
self.shutdown_request(request)
|
||||
raise
|
||||
else:
|
||||
self.shutdown_request(request)
|
||||
|
||||
def handle_timeout(self):
|
||||
"""Called if no new request arrives within self.timeout.
|
||||
|
||||
Overridden by ForkingMixIn.
|
||||
"""
|
||||
pass
|
||||
|
||||
def verify_request(self, request, client_address):
|
||||
"""Verify the request. May be overridden.
|
||||
|
||||
Return True if we should proceed with this request.
|
||||
|
||||
"""
|
||||
return True
|
||||
|
||||
def process_request(self, request, client_address):
|
||||
"""Call finish_request.
|
||||
|
||||
Overridden by ForkingMixIn and ThreadingMixIn.
|
||||
|
||||
"""
|
||||
self.finish_request(request, client_address)
|
||||
self.shutdown_request(request)
|
||||
|
||||
def server_close(self):
|
||||
"""Called to clean-up the server.
|
||||
|
||||
May be overridden.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
def finish_request(self, request, client_address):
|
||||
"""Finish one request by instantiating RequestHandlerClass."""
|
||||
self.RequestHandlerClass(request, client_address, self)
|
||||
|
||||
def shutdown_request(self, request):
|
||||
"""Called to shutdown and close an individual request."""
|
||||
self.close_request(request)
|
||||
|
||||
def close_request(self, request):
|
||||
"""Called to clean up an individual request."""
|
||||
pass
|
||||
|
||||
def handle_error(self, request, client_address):
|
||||
"""Handle an error gracefully. May be overridden.
|
||||
|
||||
The default is to print a traceback and continue.
|
||||
|
||||
"""
|
||||
print('-'*40, file=sys.stderr)
|
||||
print('Exception happened during processing of request from',
|
||||
client_address, file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
print('-'*40, file=sys.stderr)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.server_close()
|
||||
|
||||
|
||||
class TCPServer(BaseServer):
|
||||
|
||||
"""Base class for various socket-based server classes.
|
||||
|
||||
Defaults to synchronous IP stream (i.e., TCP).
|
||||
|
||||
Methods for the caller:
|
||||
|
||||
- __init__(server_address, RequestHandlerClass, bind_and_activate=True)
|
||||
- serve_forever(poll_interval=0.5)
|
||||
- shutdown()
|
||||
- handle_request() # if you don't use serve_forever()
|
||||
- fileno() -> int # for selector
|
||||
|
||||
Methods that may be overridden:
|
||||
|
||||
- server_bind()
|
||||
- server_activate()
|
||||
- get_request() -> request, client_address
|
||||
- handle_timeout()
|
||||
- verify_request(request, client_address)
|
||||
- process_request(request, client_address)
|
||||
- shutdown_request(request)
|
||||
- close_request(request)
|
||||
- handle_error()
|
||||
|
||||
Methods for derived classes:
|
||||
|
||||
- finish_request(request, client_address)
|
||||
|
||||
Class variables that may be overridden by derived classes or
|
||||
instances:
|
||||
|
||||
- timeout
|
||||
- address_family
|
||||
- socket_type
|
||||
- request_queue_size (only for stream sockets)
|
||||
- allow_reuse_address
|
||||
|
||||
Instance variables:
|
||||
|
||||
- server_address
|
||||
- RequestHandlerClass
|
||||
- socket
|
||||
|
||||
"""
|
||||
|
||||
address_family = socket.AF_INET
|
||||
|
||||
socket_type = socket.SOCK_STREAM
|
||||
|
||||
request_queue_size = 5
|
||||
|
||||
allow_reuse_address = False
|
||||
|
||||
def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True):
|
||||
"""Constructor. May be extended, do not override."""
|
||||
BaseServer.__init__(self, server_address, RequestHandlerClass)
|
||||
self.socket = socket.socket(self.address_family,
|
||||
self.socket_type)
|
||||
if bind_and_activate:
|
||||
try:
|
||||
self.server_bind()
|
||||
self.server_activate()
|
||||
except:
|
||||
self.server_close()
|
||||
raise
|
||||
|
||||
def server_bind(self):
|
||||
"""Called by constructor to bind the socket.
|
||||
|
||||
May be overridden.
|
||||
|
||||
"""
|
||||
if self.allow_reuse_address:
|
||||
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
self.socket.bind(self.server_address)
|
||||
self.server_address = self.socket.getsockname()
|
||||
|
||||
def server_activate(self):
|
||||
"""Called by constructor to activate the server.
|
||||
|
||||
May be overridden.
|
||||
|
||||
"""
|
||||
self.socket.listen(self.request_queue_size)
|
||||
|
||||
def server_close(self):
|
||||
"""Called to clean-up the server.
|
||||
|
||||
May be overridden.
|
||||
|
||||
"""
|
||||
self.socket.close()
|
||||
|
||||
def fileno(self):
|
||||
"""Return socket file number.
|
||||
|
||||
Interface required by selector.
|
||||
|
||||
"""
|
||||
return self.socket.fileno()
|
||||
|
||||
def get_request(self):
|
||||
"""Get the request and client address from the socket.
|
||||
|
||||
May be overridden.
|
||||
|
||||
"""
|
||||
return self.socket.accept()
|
||||
|
||||
def shutdown_request(self, request):
|
||||
"""Called to shutdown and close an individual request."""
|
||||
try:
|
||||
#explicitly shutdown. socket.close() merely releases
|
||||
#the socket and waits for GC to perform the actual close.
|
||||
request.shutdown(socket.SHUT_WR)
|
||||
except OSError:
|
||||
pass #some platforms may raise ENOTCONN here
|
||||
self.close_request(request)
|
||||
|
||||
def close_request(self, request):
|
||||
"""Called to clean up an individual request."""
|
||||
request.close()
|
||||
|
||||
|
||||
class UDPServer(TCPServer):
|
||||
|
||||
"""UDP server class."""
|
||||
|
||||
allow_reuse_address = False
|
||||
|
||||
socket_type = socket.SOCK_DGRAM
|
||||
|
||||
max_packet_size = 8192
|
||||
|
||||
def get_request(self):
|
||||
data, client_addr = self.socket.recvfrom(self.max_packet_size)
|
||||
return (data, self.socket), client_addr
|
||||
|
||||
def server_activate(self):
|
||||
# No need to call listen() for UDP.
|
||||
pass
|
||||
|
||||
def shutdown_request(self, request):
|
||||
# No need to shutdown anything.
|
||||
self.close_request(request)
|
||||
|
||||
def close_request(self, request):
|
||||
# No need to close anything.
|
||||
pass
|
||||
|
||||
if hasattr(os, "fork"):
|
||||
class ForkingMixIn:
|
||||
"""Mix-in class to handle each request in a new process."""
|
||||
|
||||
timeout = 300
|
||||
active_children = None
|
||||
max_children = 40
|
||||
|
||||
def collect_children(self):
|
||||
"""Internal routine to wait for children that have exited."""
|
||||
if self.active_children is None:
|
||||
return
|
||||
|
||||
# If we're above the max number of children, wait and reap them until
|
||||
# we go back below threshold. Note that we use waitpid(-1) below to be
|
||||
# able to collect children in size(<defunct children>) syscalls instead
|
||||
# of size(<children>): the downside is that this might reap children
|
||||
# which we didn't spawn, which is why we only resort to this when we're
|
||||
# above max_children.
|
||||
while len(self.active_children) >= self.max_children:
|
||||
try:
|
||||
pid, _ = os.waitpid(-1, 0)
|
||||
self.active_children.discard(pid)
|
||||
except ChildProcessError:
|
||||
# we don't have any children, we're done
|
||||
self.active_children.clear()
|
||||
except OSError:
|
||||
break
|
||||
|
||||
# Now reap all defunct children.
|
||||
for pid in self.active_children.copy():
|
||||
try:
|
||||
pid, _ = os.waitpid(pid, os.WNOHANG)
|
||||
# if the child hasn't exited yet, pid will be 0 and ignored by
|
||||
# discard() below
|
||||
self.active_children.discard(pid)
|
||||
except ChildProcessError:
|
||||
# someone else reaped it
|
||||
self.active_children.discard(pid)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def handle_timeout(self):
|
||||
"""Wait for zombies after self.timeout seconds of inactivity.
|
||||
|
||||
May be extended, do not override.
|
||||
"""
|
||||
self.collect_children()
|
||||
|
||||
def service_actions(self):
|
||||
"""Collect the zombie child processes regularly in the ForkingMixIn.
|
||||
|
||||
service_actions is called in the BaseServer's serve_forver loop.
|
||||
"""
|
||||
self.collect_children()
|
||||
|
||||
def process_request(self, request, client_address):
|
||||
"""Fork a new subprocess to process the request."""
|
||||
pid = os.fork()
|
||||
if pid:
|
||||
# Parent process
|
||||
if self.active_children is None:
|
||||
self.active_children = set()
|
||||
self.active_children.add(pid)
|
||||
self.close_request(request)
|
||||
return
|
||||
else:
|
||||
# Child process.
|
||||
# This must never return, hence os._exit()!
|
||||
status = 1
|
||||
try:
|
||||
self.finish_request(request, client_address)
|
||||
status = 0
|
||||
except Exception:
|
||||
self.handle_error(request, client_address)
|
||||
finally:
|
||||
try:
|
||||
self.shutdown_request(request)
|
||||
finally:
|
||||
os._exit(status)
|
||||
|
||||
|
||||
class ThreadingMixIn:
|
||||
"""Mix-in class to handle each request in a new thread."""
|
||||
|
||||
# Decides how threads will act upon termination of the
|
||||
# main process
|
||||
daemon_threads = False
|
||||
|
||||
def process_request_thread(self, request, client_address):
|
||||
"""Same as in BaseServer but as a thread.
|
||||
|
||||
In addition, exception handling is done here.
|
||||
|
||||
"""
|
||||
try:
|
||||
self.finish_request(request, client_address)
|
||||
except Exception:
|
||||
self.handle_error(request, client_address)
|
||||
finally:
|
||||
self.shutdown_request(request)
|
||||
|
||||
def process_request(self, request, client_address):
|
||||
"""Start a new thread to process the request."""
|
||||
t = threading.Thread(target = self.process_request_thread,
|
||||
args = (request, client_address))
|
||||
t.daemon = self.daemon_threads
|
||||
t.start()
|
||||
|
||||
|
||||
if hasattr(os, "fork"):
|
||||
class ForkingUDPServer(ForkingMixIn, UDPServer): pass
|
||||
class ForkingTCPServer(ForkingMixIn, TCPServer): pass
|
||||
|
||||
class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
|
||||
class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass
|
||||
|
||||
if hasattr(socket, 'AF_UNIX'):
|
||||
|
||||
class UnixStreamServer(TCPServer):
|
||||
address_family = socket.AF_UNIX
|
||||
|
||||
class UnixDatagramServer(UDPServer):
|
||||
address_family = socket.AF_UNIX
|
||||
|
||||
class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass
|
||||
|
||||
class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass
|
||||
|
||||
class BaseRequestHandler:
|
||||
|
||||
"""Base class for request handler classes.
|
||||
|
||||
This class is instantiated for each request to be handled. The
|
||||
constructor sets the instance variables request, client_address
|
||||
and server, and then calls the handle() method. To implement a
|
||||
specific service, all you need to do is to derive a class which
|
||||
defines a handle() method.
|
||||
|
||||
The handle() method can find the request as self.request, the
|
||||
client address as self.client_address, and the server (in case it
|
||||
needs access to per-server information) as self.server. Since a
|
||||
separate instance is created for each request, the handle() method
|
||||
can define other arbitrary instance variables.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, request, client_address, server):
|
||||
self.request = request
|
||||
self.client_address = client_address
|
||||
self.server = server
|
||||
self.setup()
|
||||
try:
|
||||
self.handle()
|
||||
finally:
|
||||
self.finish()
|
||||
|
||||
def setup(self):
|
||||
pass
|
||||
|
||||
def handle(self):
|
||||
pass
|
||||
|
||||
def finish(self):
|
||||
pass
|
||||
|
||||
|
||||
# The following two classes make it possible to use the same service
|
||||
# class for stream or datagram servers.
|
||||
# Each class sets up these instance variables:
|
||||
# - rfile: a file object from which receives the request is read
|
||||
# - wfile: a file object to which the reply is written
|
||||
# When the handle() method returns, wfile is flushed properly
|
||||
|
||||
|
||||
class StreamRequestHandler(BaseRequestHandler):
|
||||
|
||||
"""Define self.rfile and self.wfile for stream sockets."""
|
||||
|
||||
# Default buffer sizes for rfile, wfile.
|
||||
# We default rfile to buffered because otherwise it could be
|
||||
# really slow for large data (a getc() call per byte); we make
|
||||
# wfile unbuffered because (a) often after a write() we want to
|
||||
# read and we need to flush the line; (b) big writes to unbuffered
|
||||
# files are typically optimized by stdio even when big reads
|
||||
# aren't.
|
||||
rbufsize = -1
|
||||
wbufsize = 0
|
||||
|
||||
# A timeout to apply to the request socket, if not None.
|
||||
timeout = None
|
||||
|
||||
# Disable nagle algorithm for this socket, if True.
|
||||
# Use only when wbufsize != 0, to avoid small packets.
|
||||
disable_nagle_algorithm = False
|
||||
|
||||
def setup(self):
|
||||
self.connection = self.request
|
||||
if self.timeout is not None:
|
||||
self.connection.settimeout(self.timeout)
|
||||
if self.disable_nagle_algorithm:
|
||||
self.connection.setsockopt(socket.IPPROTO_TCP,
|
||||
socket.TCP_NODELAY, True)
|
||||
self.rfile = self.connection.makefile('rb', self.rbufsize)
|
||||
if self.wbufsize == 0:
|
||||
self.wfile = _SocketWriter(self.connection)
|
||||
else:
|
||||
self.wfile = self.connection.makefile('wb', self.wbufsize)
|
||||
|
||||
def finish(self):
|
||||
if not self.wfile.closed:
|
||||
try:
|
||||
self.wfile.flush()
|
||||
except socket.error:
|
||||
# A final socket error may have occurred here, such as
|
||||
# the local error ECONNABORTED.
|
||||
pass
|
||||
self.wfile.close()
|
||||
self.rfile.close()
|
||||
|
||||
class _SocketWriter(BufferedIOBase):
|
||||
"""Simple writable BufferedIOBase implementation for a socket
|
||||
|
||||
Does not hold data in a buffer, avoiding any need to call flush()."""
|
||||
|
||||
def __init__(self, sock):
|
||||
self._sock = sock
|
||||
|
||||
def writable(self):
|
||||
return True
|
||||
|
||||
def write(self, b):
|
||||
self._sock.sendall(b)
|
||||
# XXX RustPython TODO: implement memoryview properly
|
||||
#with memoryview(b) as view:
|
||||
# return view.nbytes
|
||||
return len(b)
|
||||
|
||||
def fileno(self):
|
||||
return self._sock.fileno()
|
||||
|
||||
class DatagramRequestHandler(BaseRequestHandler):
|
||||
|
||||
"""Define self.rfile and self.wfile for datagram sockets."""
|
||||
|
||||
def setup(self):
|
||||
from io import BytesIO
|
||||
self.packet, self.socket = self.request
|
||||
self.rfile = BytesIO(self.packet)
|
||||
self.wfile = BytesIO()
|
||||
|
||||
def finish(self):
|
||||
self.socket.sendto(self.wfile.getvalue(), self.client_address)
|
||||
272
Lib/stringprep.py
vendored
Normal file
272
Lib/stringprep.py
vendored
Normal file
@@ -0,0 +1,272 @@
|
||||
# This file is generated by mkstringprep.py. DO NOT EDIT.
|
||||
"""Library that exposes various tables found in the StringPrep RFC 3454.
|
||||
|
||||
There are two kinds of tables: sets, for which a member test is provided,
|
||||
and mappings, for which a mapping function is provided.
|
||||
"""
|
||||
|
||||
from unicodedata import ucd_3_2_0 as unicodedata
|
||||
|
||||
assert unicodedata.unidata_version == '3.2.0'
|
||||
|
||||
def in_table_a1(code):
|
||||
if unicodedata.category(code) != 'Cn': return False
|
||||
c = ord(code)
|
||||
if 0xFDD0 <= c < 0xFDF0: return False
|
||||
return (c & 0xFFFF) not in (0xFFFE, 0xFFFF)
|
||||
|
||||
|
||||
b1_set = set([173, 847, 6150, 6155, 6156, 6157, 8203, 8204, 8205, 8288, 65279] + list(range(65024,65040)))
|
||||
def in_table_b1(code):
|
||||
return ord(code) in b1_set
|
||||
|
||||
|
||||
b3_exceptions = {
|
||||
0xb5:'\u03bc', 0xdf:'ss', 0x130:'i\u0307', 0x149:'\u02bcn',
|
||||
0x17f:'s', 0x1f0:'j\u030c', 0x345:'\u03b9', 0x37a:' \u03b9',
|
||||
0x390:'\u03b9\u0308\u0301', 0x3b0:'\u03c5\u0308\u0301', 0x3c2:'\u03c3', 0x3d0:'\u03b2',
|
||||
0x3d1:'\u03b8', 0x3d2:'\u03c5', 0x3d3:'\u03cd', 0x3d4:'\u03cb',
|
||||
0x3d5:'\u03c6', 0x3d6:'\u03c0', 0x3f0:'\u03ba', 0x3f1:'\u03c1',
|
||||
0x3f2:'\u03c3', 0x3f5:'\u03b5', 0x587:'\u0565\u0582', 0x1e96:'h\u0331',
|
||||
0x1e97:'t\u0308', 0x1e98:'w\u030a', 0x1e99:'y\u030a', 0x1e9a:'a\u02be',
|
||||
0x1e9b:'\u1e61', 0x1f50:'\u03c5\u0313', 0x1f52:'\u03c5\u0313\u0300', 0x1f54:'\u03c5\u0313\u0301',
|
||||
0x1f56:'\u03c5\u0313\u0342', 0x1f80:'\u1f00\u03b9', 0x1f81:'\u1f01\u03b9', 0x1f82:'\u1f02\u03b9',
|
||||
0x1f83:'\u1f03\u03b9', 0x1f84:'\u1f04\u03b9', 0x1f85:'\u1f05\u03b9', 0x1f86:'\u1f06\u03b9',
|
||||
0x1f87:'\u1f07\u03b9', 0x1f88:'\u1f00\u03b9', 0x1f89:'\u1f01\u03b9', 0x1f8a:'\u1f02\u03b9',
|
||||
0x1f8b:'\u1f03\u03b9', 0x1f8c:'\u1f04\u03b9', 0x1f8d:'\u1f05\u03b9', 0x1f8e:'\u1f06\u03b9',
|
||||
0x1f8f:'\u1f07\u03b9', 0x1f90:'\u1f20\u03b9', 0x1f91:'\u1f21\u03b9', 0x1f92:'\u1f22\u03b9',
|
||||
0x1f93:'\u1f23\u03b9', 0x1f94:'\u1f24\u03b9', 0x1f95:'\u1f25\u03b9', 0x1f96:'\u1f26\u03b9',
|
||||
0x1f97:'\u1f27\u03b9', 0x1f98:'\u1f20\u03b9', 0x1f99:'\u1f21\u03b9', 0x1f9a:'\u1f22\u03b9',
|
||||
0x1f9b:'\u1f23\u03b9', 0x1f9c:'\u1f24\u03b9', 0x1f9d:'\u1f25\u03b9', 0x1f9e:'\u1f26\u03b9',
|
||||
0x1f9f:'\u1f27\u03b9', 0x1fa0:'\u1f60\u03b9', 0x1fa1:'\u1f61\u03b9', 0x1fa2:'\u1f62\u03b9',
|
||||
0x1fa3:'\u1f63\u03b9', 0x1fa4:'\u1f64\u03b9', 0x1fa5:'\u1f65\u03b9', 0x1fa6:'\u1f66\u03b9',
|
||||
0x1fa7:'\u1f67\u03b9', 0x1fa8:'\u1f60\u03b9', 0x1fa9:'\u1f61\u03b9', 0x1faa:'\u1f62\u03b9',
|
||||
0x1fab:'\u1f63\u03b9', 0x1fac:'\u1f64\u03b9', 0x1fad:'\u1f65\u03b9', 0x1fae:'\u1f66\u03b9',
|
||||
0x1faf:'\u1f67\u03b9', 0x1fb2:'\u1f70\u03b9', 0x1fb3:'\u03b1\u03b9', 0x1fb4:'\u03ac\u03b9',
|
||||
0x1fb6:'\u03b1\u0342', 0x1fb7:'\u03b1\u0342\u03b9', 0x1fbc:'\u03b1\u03b9', 0x1fbe:'\u03b9',
|
||||
0x1fc2:'\u1f74\u03b9', 0x1fc3:'\u03b7\u03b9', 0x1fc4:'\u03ae\u03b9', 0x1fc6:'\u03b7\u0342',
|
||||
0x1fc7:'\u03b7\u0342\u03b9', 0x1fcc:'\u03b7\u03b9', 0x1fd2:'\u03b9\u0308\u0300', 0x1fd3:'\u03b9\u0308\u0301',
|
||||
0x1fd6:'\u03b9\u0342', 0x1fd7:'\u03b9\u0308\u0342', 0x1fe2:'\u03c5\u0308\u0300', 0x1fe3:'\u03c5\u0308\u0301',
|
||||
0x1fe4:'\u03c1\u0313', 0x1fe6:'\u03c5\u0342', 0x1fe7:'\u03c5\u0308\u0342', 0x1ff2:'\u1f7c\u03b9',
|
||||
0x1ff3:'\u03c9\u03b9', 0x1ff4:'\u03ce\u03b9', 0x1ff6:'\u03c9\u0342', 0x1ff7:'\u03c9\u0342\u03b9',
|
||||
0x1ffc:'\u03c9\u03b9', 0x20a8:'rs', 0x2102:'c', 0x2103:'\xb0c',
|
||||
0x2107:'\u025b', 0x2109:'\xb0f', 0x210b:'h', 0x210c:'h',
|
||||
0x210d:'h', 0x2110:'i', 0x2111:'i', 0x2112:'l',
|
||||
0x2115:'n', 0x2116:'no', 0x2119:'p', 0x211a:'q',
|
||||
0x211b:'r', 0x211c:'r', 0x211d:'r', 0x2120:'sm',
|
||||
0x2121:'tel', 0x2122:'tm', 0x2124:'z', 0x2128:'z',
|
||||
0x212c:'b', 0x212d:'c', 0x2130:'e', 0x2131:'f',
|
||||
0x2133:'m', 0x213e:'\u03b3', 0x213f:'\u03c0', 0x2145:'d',
|
||||
0x3371:'hpa', 0x3373:'au', 0x3375:'ov', 0x3380:'pa',
|
||||
0x3381:'na', 0x3382:'\u03bca', 0x3383:'ma', 0x3384:'ka',
|
||||
0x3385:'kb', 0x3386:'mb', 0x3387:'gb', 0x338a:'pf',
|
||||
0x338b:'nf', 0x338c:'\u03bcf', 0x3390:'hz', 0x3391:'khz',
|
||||
0x3392:'mhz', 0x3393:'ghz', 0x3394:'thz', 0x33a9:'pa',
|
||||
0x33aa:'kpa', 0x33ab:'mpa', 0x33ac:'gpa', 0x33b4:'pv',
|
||||
0x33b5:'nv', 0x33b6:'\u03bcv', 0x33b7:'mv', 0x33b8:'kv',
|
||||
0x33b9:'mv', 0x33ba:'pw', 0x33bb:'nw', 0x33bc:'\u03bcw',
|
||||
0x33bd:'mw', 0x33be:'kw', 0x33bf:'mw', 0x33c0:'k\u03c9',
|
||||
0x33c1:'m\u03c9', 0x33c3:'bq', 0x33c6:'c\u2215kg', 0x33c7:'co.',
|
||||
0x33c8:'db', 0x33c9:'gy', 0x33cb:'hp', 0x33cd:'kk',
|
||||
0x33ce:'km', 0x33d7:'ph', 0x33d9:'ppm', 0x33da:'pr',
|
||||
0x33dc:'sv', 0x33dd:'wb', 0xfb00:'ff', 0xfb01:'fi',
|
||||
0xfb02:'fl', 0xfb03:'ffi', 0xfb04:'ffl', 0xfb05:'st',
|
||||
0xfb06:'st', 0xfb13:'\u0574\u0576', 0xfb14:'\u0574\u0565', 0xfb15:'\u0574\u056b',
|
||||
0xfb16:'\u057e\u0576', 0xfb17:'\u0574\u056d', 0x1d400:'a', 0x1d401:'b',
|
||||
0x1d402:'c', 0x1d403:'d', 0x1d404:'e', 0x1d405:'f',
|
||||
0x1d406:'g', 0x1d407:'h', 0x1d408:'i', 0x1d409:'j',
|
||||
0x1d40a:'k', 0x1d40b:'l', 0x1d40c:'m', 0x1d40d:'n',
|
||||
0x1d40e:'o', 0x1d40f:'p', 0x1d410:'q', 0x1d411:'r',
|
||||
0x1d412:'s', 0x1d413:'t', 0x1d414:'u', 0x1d415:'v',
|
||||
0x1d416:'w', 0x1d417:'x', 0x1d418:'y', 0x1d419:'z',
|
||||
0x1d434:'a', 0x1d435:'b', 0x1d436:'c', 0x1d437:'d',
|
||||
0x1d438:'e', 0x1d439:'f', 0x1d43a:'g', 0x1d43b:'h',
|
||||
0x1d43c:'i', 0x1d43d:'j', 0x1d43e:'k', 0x1d43f:'l',
|
||||
0x1d440:'m', 0x1d441:'n', 0x1d442:'o', 0x1d443:'p',
|
||||
0x1d444:'q', 0x1d445:'r', 0x1d446:'s', 0x1d447:'t',
|
||||
0x1d448:'u', 0x1d449:'v', 0x1d44a:'w', 0x1d44b:'x',
|
||||
0x1d44c:'y', 0x1d44d:'z', 0x1d468:'a', 0x1d469:'b',
|
||||
0x1d46a:'c', 0x1d46b:'d', 0x1d46c:'e', 0x1d46d:'f',
|
||||
0x1d46e:'g', 0x1d46f:'h', 0x1d470:'i', 0x1d471:'j',
|
||||
0x1d472:'k', 0x1d473:'l', 0x1d474:'m', 0x1d475:'n',
|
||||
0x1d476:'o', 0x1d477:'p', 0x1d478:'q', 0x1d479:'r',
|
||||
0x1d47a:'s', 0x1d47b:'t', 0x1d47c:'u', 0x1d47d:'v',
|
||||
0x1d47e:'w', 0x1d47f:'x', 0x1d480:'y', 0x1d481:'z',
|
||||
0x1d49c:'a', 0x1d49e:'c', 0x1d49f:'d', 0x1d4a2:'g',
|
||||
0x1d4a5:'j', 0x1d4a6:'k', 0x1d4a9:'n', 0x1d4aa:'o',
|
||||
0x1d4ab:'p', 0x1d4ac:'q', 0x1d4ae:'s', 0x1d4af:'t',
|
||||
0x1d4b0:'u', 0x1d4b1:'v', 0x1d4b2:'w', 0x1d4b3:'x',
|
||||
0x1d4b4:'y', 0x1d4b5:'z', 0x1d4d0:'a', 0x1d4d1:'b',
|
||||
0x1d4d2:'c', 0x1d4d3:'d', 0x1d4d4:'e', 0x1d4d5:'f',
|
||||
0x1d4d6:'g', 0x1d4d7:'h', 0x1d4d8:'i', 0x1d4d9:'j',
|
||||
0x1d4da:'k', 0x1d4db:'l', 0x1d4dc:'m', 0x1d4dd:'n',
|
||||
0x1d4de:'o', 0x1d4df:'p', 0x1d4e0:'q', 0x1d4e1:'r',
|
||||
0x1d4e2:'s', 0x1d4e3:'t', 0x1d4e4:'u', 0x1d4e5:'v',
|
||||
0x1d4e6:'w', 0x1d4e7:'x', 0x1d4e8:'y', 0x1d4e9:'z',
|
||||
0x1d504:'a', 0x1d505:'b', 0x1d507:'d', 0x1d508:'e',
|
||||
0x1d509:'f', 0x1d50a:'g', 0x1d50d:'j', 0x1d50e:'k',
|
||||
0x1d50f:'l', 0x1d510:'m', 0x1d511:'n', 0x1d512:'o',
|
||||
0x1d513:'p', 0x1d514:'q', 0x1d516:'s', 0x1d517:'t',
|
||||
0x1d518:'u', 0x1d519:'v', 0x1d51a:'w', 0x1d51b:'x',
|
||||
0x1d51c:'y', 0x1d538:'a', 0x1d539:'b', 0x1d53b:'d',
|
||||
0x1d53c:'e', 0x1d53d:'f', 0x1d53e:'g', 0x1d540:'i',
|
||||
0x1d541:'j', 0x1d542:'k', 0x1d543:'l', 0x1d544:'m',
|
||||
0x1d546:'o', 0x1d54a:'s', 0x1d54b:'t', 0x1d54c:'u',
|
||||
0x1d54d:'v', 0x1d54e:'w', 0x1d54f:'x', 0x1d550:'y',
|
||||
0x1d56c:'a', 0x1d56d:'b', 0x1d56e:'c', 0x1d56f:'d',
|
||||
0x1d570:'e', 0x1d571:'f', 0x1d572:'g', 0x1d573:'h',
|
||||
0x1d574:'i', 0x1d575:'j', 0x1d576:'k', 0x1d577:'l',
|
||||
0x1d578:'m', 0x1d579:'n', 0x1d57a:'o', 0x1d57b:'p',
|
||||
0x1d57c:'q', 0x1d57d:'r', 0x1d57e:'s', 0x1d57f:'t',
|
||||
0x1d580:'u', 0x1d581:'v', 0x1d582:'w', 0x1d583:'x',
|
||||
0x1d584:'y', 0x1d585:'z', 0x1d5a0:'a', 0x1d5a1:'b',
|
||||
0x1d5a2:'c', 0x1d5a3:'d', 0x1d5a4:'e', 0x1d5a5:'f',
|
||||
0x1d5a6:'g', 0x1d5a7:'h', 0x1d5a8:'i', 0x1d5a9:'j',
|
||||
0x1d5aa:'k', 0x1d5ab:'l', 0x1d5ac:'m', 0x1d5ad:'n',
|
||||
0x1d5ae:'o', 0x1d5af:'p', 0x1d5b0:'q', 0x1d5b1:'r',
|
||||
0x1d5b2:'s', 0x1d5b3:'t', 0x1d5b4:'u', 0x1d5b5:'v',
|
||||
0x1d5b6:'w', 0x1d5b7:'x', 0x1d5b8:'y', 0x1d5b9:'z',
|
||||
0x1d5d4:'a', 0x1d5d5:'b', 0x1d5d6:'c', 0x1d5d7:'d',
|
||||
0x1d5d8:'e', 0x1d5d9:'f', 0x1d5da:'g', 0x1d5db:'h',
|
||||
0x1d5dc:'i', 0x1d5dd:'j', 0x1d5de:'k', 0x1d5df:'l',
|
||||
0x1d5e0:'m', 0x1d5e1:'n', 0x1d5e2:'o', 0x1d5e3:'p',
|
||||
0x1d5e4:'q', 0x1d5e5:'r', 0x1d5e6:'s', 0x1d5e7:'t',
|
||||
0x1d5e8:'u', 0x1d5e9:'v', 0x1d5ea:'w', 0x1d5eb:'x',
|
||||
0x1d5ec:'y', 0x1d5ed:'z', 0x1d608:'a', 0x1d609:'b',
|
||||
0x1d60a:'c', 0x1d60b:'d', 0x1d60c:'e', 0x1d60d:'f',
|
||||
0x1d60e:'g', 0x1d60f:'h', 0x1d610:'i', 0x1d611:'j',
|
||||
0x1d612:'k', 0x1d613:'l', 0x1d614:'m', 0x1d615:'n',
|
||||
0x1d616:'o', 0x1d617:'p', 0x1d618:'q', 0x1d619:'r',
|
||||
0x1d61a:'s', 0x1d61b:'t', 0x1d61c:'u', 0x1d61d:'v',
|
||||
0x1d61e:'w', 0x1d61f:'x', 0x1d620:'y', 0x1d621:'z',
|
||||
0x1d63c:'a', 0x1d63d:'b', 0x1d63e:'c', 0x1d63f:'d',
|
||||
0x1d640:'e', 0x1d641:'f', 0x1d642:'g', 0x1d643:'h',
|
||||
0x1d644:'i', 0x1d645:'j', 0x1d646:'k', 0x1d647:'l',
|
||||
0x1d648:'m', 0x1d649:'n', 0x1d64a:'o', 0x1d64b:'p',
|
||||
0x1d64c:'q', 0x1d64d:'r', 0x1d64e:'s', 0x1d64f:'t',
|
||||
0x1d650:'u', 0x1d651:'v', 0x1d652:'w', 0x1d653:'x',
|
||||
0x1d654:'y', 0x1d655:'z', 0x1d670:'a', 0x1d671:'b',
|
||||
0x1d672:'c', 0x1d673:'d', 0x1d674:'e', 0x1d675:'f',
|
||||
0x1d676:'g', 0x1d677:'h', 0x1d678:'i', 0x1d679:'j',
|
||||
0x1d67a:'k', 0x1d67b:'l', 0x1d67c:'m', 0x1d67d:'n',
|
||||
0x1d67e:'o', 0x1d67f:'p', 0x1d680:'q', 0x1d681:'r',
|
||||
0x1d682:'s', 0x1d683:'t', 0x1d684:'u', 0x1d685:'v',
|
||||
0x1d686:'w', 0x1d687:'x', 0x1d688:'y', 0x1d689:'z',
|
||||
0x1d6a8:'\u03b1', 0x1d6a9:'\u03b2', 0x1d6aa:'\u03b3', 0x1d6ab:'\u03b4',
|
||||
0x1d6ac:'\u03b5', 0x1d6ad:'\u03b6', 0x1d6ae:'\u03b7', 0x1d6af:'\u03b8',
|
||||
0x1d6b0:'\u03b9', 0x1d6b1:'\u03ba', 0x1d6b2:'\u03bb', 0x1d6b3:'\u03bc',
|
||||
0x1d6b4:'\u03bd', 0x1d6b5:'\u03be', 0x1d6b6:'\u03bf', 0x1d6b7:'\u03c0',
|
||||
0x1d6b8:'\u03c1', 0x1d6b9:'\u03b8', 0x1d6ba:'\u03c3', 0x1d6bb:'\u03c4',
|
||||
0x1d6bc:'\u03c5', 0x1d6bd:'\u03c6', 0x1d6be:'\u03c7', 0x1d6bf:'\u03c8',
|
||||
0x1d6c0:'\u03c9', 0x1d6d3:'\u03c3', 0x1d6e2:'\u03b1', 0x1d6e3:'\u03b2',
|
||||
0x1d6e4:'\u03b3', 0x1d6e5:'\u03b4', 0x1d6e6:'\u03b5', 0x1d6e7:'\u03b6',
|
||||
0x1d6e8:'\u03b7', 0x1d6e9:'\u03b8', 0x1d6ea:'\u03b9', 0x1d6eb:'\u03ba',
|
||||
0x1d6ec:'\u03bb', 0x1d6ed:'\u03bc', 0x1d6ee:'\u03bd', 0x1d6ef:'\u03be',
|
||||
0x1d6f0:'\u03bf', 0x1d6f1:'\u03c0', 0x1d6f2:'\u03c1', 0x1d6f3:'\u03b8',
|
||||
0x1d6f4:'\u03c3', 0x1d6f5:'\u03c4', 0x1d6f6:'\u03c5', 0x1d6f7:'\u03c6',
|
||||
0x1d6f8:'\u03c7', 0x1d6f9:'\u03c8', 0x1d6fa:'\u03c9', 0x1d70d:'\u03c3',
|
||||
0x1d71c:'\u03b1', 0x1d71d:'\u03b2', 0x1d71e:'\u03b3', 0x1d71f:'\u03b4',
|
||||
0x1d720:'\u03b5', 0x1d721:'\u03b6', 0x1d722:'\u03b7', 0x1d723:'\u03b8',
|
||||
0x1d724:'\u03b9', 0x1d725:'\u03ba', 0x1d726:'\u03bb', 0x1d727:'\u03bc',
|
||||
0x1d728:'\u03bd', 0x1d729:'\u03be', 0x1d72a:'\u03bf', 0x1d72b:'\u03c0',
|
||||
0x1d72c:'\u03c1', 0x1d72d:'\u03b8', 0x1d72e:'\u03c3', 0x1d72f:'\u03c4',
|
||||
0x1d730:'\u03c5', 0x1d731:'\u03c6', 0x1d732:'\u03c7', 0x1d733:'\u03c8',
|
||||
0x1d734:'\u03c9', 0x1d747:'\u03c3', 0x1d756:'\u03b1', 0x1d757:'\u03b2',
|
||||
0x1d758:'\u03b3', 0x1d759:'\u03b4', 0x1d75a:'\u03b5', 0x1d75b:'\u03b6',
|
||||
0x1d75c:'\u03b7', 0x1d75d:'\u03b8', 0x1d75e:'\u03b9', 0x1d75f:'\u03ba',
|
||||
0x1d760:'\u03bb', 0x1d761:'\u03bc', 0x1d762:'\u03bd', 0x1d763:'\u03be',
|
||||
0x1d764:'\u03bf', 0x1d765:'\u03c0', 0x1d766:'\u03c1', 0x1d767:'\u03b8',
|
||||
0x1d768:'\u03c3', 0x1d769:'\u03c4', 0x1d76a:'\u03c5', 0x1d76b:'\u03c6',
|
||||
0x1d76c:'\u03c7', 0x1d76d:'\u03c8', 0x1d76e:'\u03c9', 0x1d781:'\u03c3',
|
||||
0x1d790:'\u03b1', 0x1d791:'\u03b2', 0x1d792:'\u03b3', 0x1d793:'\u03b4',
|
||||
0x1d794:'\u03b5', 0x1d795:'\u03b6', 0x1d796:'\u03b7', 0x1d797:'\u03b8',
|
||||
0x1d798:'\u03b9', 0x1d799:'\u03ba', 0x1d79a:'\u03bb', 0x1d79b:'\u03bc',
|
||||
0x1d79c:'\u03bd', 0x1d79d:'\u03be', 0x1d79e:'\u03bf', 0x1d79f:'\u03c0',
|
||||
0x1d7a0:'\u03c1', 0x1d7a1:'\u03b8', 0x1d7a2:'\u03c3', 0x1d7a3:'\u03c4',
|
||||
0x1d7a4:'\u03c5', 0x1d7a5:'\u03c6', 0x1d7a6:'\u03c7', 0x1d7a7:'\u03c8',
|
||||
0x1d7a8:'\u03c9', 0x1d7bb:'\u03c3', }
|
||||
|
||||
def map_table_b3(code):
|
||||
r = b3_exceptions.get(ord(code))
|
||||
if r is not None: return r
|
||||
return code.lower()
|
||||
|
||||
|
||||
def map_table_b2(a):
|
||||
al = map_table_b3(a)
|
||||
b = unicodedata.normalize("NFKC", al)
|
||||
bl = "".join([map_table_b3(ch) for ch in b])
|
||||
c = unicodedata.normalize("NFKC", bl)
|
||||
if b != c:
|
||||
return c
|
||||
else:
|
||||
return al
|
||||
|
||||
|
||||
def in_table_c11(code):
|
||||
return code == " "
|
||||
|
||||
|
||||
def in_table_c12(code):
|
||||
return unicodedata.category(code) == "Zs" and code != " "
|
||||
|
||||
def in_table_c11_c12(code):
|
||||
return unicodedata.category(code) == "Zs"
|
||||
|
||||
|
||||
def in_table_c21(code):
|
||||
return ord(code) < 128 and unicodedata.category(code) == "Cc"
|
||||
|
||||
c22_specials = set([1757, 1807, 6158, 8204, 8205, 8232, 8233, 65279] + list(range(8288,8292)) + list(range(8298,8304)) + list(range(65529,65533)) + list(range(119155,119163)))
|
||||
def in_table_c22(code):
|
||||
c = ord(code)
|
||||
if c < 128: return False
|
||||
if unicodedata.category(code) == "Cc": return True
|
||||
return c in c22_specials
|
||||
|
||||
def in_table_c21_c22(code):
|
||||
return unicodedata.category(code) == "Cc" or \
|
||||
ord(code) in c22_specials
|
||||
|
||||
|
||||
def in_table_c3(code):
|
||||
return unicodedata.category(code) == "Co"
|
||||
|
||||
|
||||
def in_table_c4(code):
|
||||
c = ord(code)
|
||||
if c < 0xFDD0: return False
|
||||
if c < 0xFDF0: return True
|
||||
return (ord(code) & 0xFFFF) in (0xFFFE, 0xFFFF)
|
||||
|
||||
|
||||
def in_table_c5(code):
|
||||
return unicodedata.category(code) == "Cs"
|
||||
|
||||
|
||||
c6_set = set(range(65529,65534))
|
||||
def in_table_c6(code):
|
||||
return ord(code) in c6_set
|
||||
|
||||
|
||||
c7_set = set(range(12272,12284))
|
||||
def in_table_c7(code):
|
||||
return ord(code) in c7_set
|
||||
|
||||
|
||||
c8_set = set([832, 833, 8206, 8207] + list(range(8234,8239)) + list(range(8298,8304)))
|
||||
def in_table_c8(code):
|
||||
return ord(code) in c8_set
|
||||
|
||||
|
||||
c9_set = set([917505] + list(range(917536,917632)))
|
||||
def in_table_c9(code):
|
||||
return ord(code) in c9_set
|
||||
|
||||
|
||||
def in_table_d1(code):
|
||||
return unicodedata.bidirectional(code) in ("R","AL")
|
||||
|
||||
|
||||
def in_table_d2(code):
|
||||
return unicodedata.bidirectional(code) == "L"
|
||||
0
Lib/urllib/__init__.py
Normal file
0
Lib/urllib/__init__.py
Normal file
81
Lib/urllib/error.py
Normal file
81
Lib/urllib/error.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""Exception classes raised by urllib.
|
||||
|
||||
The base exception class is URLError, which inherits from OSError. It
|
||||
doesn't define any behavior of its own, but is the base class for all
|
||||
exceptions defined in this package.
|
||||
|
||||
HTTPError is an exception class that is also a valid HTTP response
|
||||
instance. It behaves this way because HTTP protocol errors are valid
|
||||
responses, with a status code, headers, and a body. In some contexts,
|
||||
an application may want to handle an exception like a regular
|
||||
response.
|
||||
"""
|
||||
|
||||
import urllib.response
|
||||
|
||||
__all__ = ['URLError', 'HTTPError', 'ContentTooShortError']
|
||||
|
||||
|
||||
# do these error classes make sense?
|
||||
# make sure all of the OSError stuff is overridden. we just want to be
|
||||
# subtypes.
|
||||
|
||||
class URLError(OSError):
|
||||
# URLError is a sub-type of OSError, but it doesn't share any of
|
||||
# the implementation. need to override __init__ and __str__.
|
||||
# It sets self.args for compatibility with other EnvironmentError
|
||||
# subclasses, but args doesn't have the typical format with errno in
|
||||
# slot 0 and strerror in slot 1. This may be better than nothing.
|
||||
def __init__(self, reason, filename=None):
|
||||
self.args = reason,
|
||||
self.reason = reason
|
||||
if filename is not None:
|
||||
self.filename = filename
|
||||
|
||||
def __str__(self):
|
||||
return '<urlopen error %s>' % self.reason
|
||||
|
||||
|
||||
class HTTPError(URLError, urllib.response.addinfourl):
|
||||
"""Raised when HTTP error occurs, but also acts like non-error return"""
|
||||
__super_init = urllib.response.addinfourl.__init__
|
||||
|
||||
def __init__(self, url, code, msg, hdrs, fp):
|
||||
self.code = code
|
||||
self.msg = msg
|
||||
self.hdrs = hdrs
|
||||
self.fp = fp
|
||||
self.filename = url
|
||||
# The addinfourl classes depend on fp being a valid file
|
||||
# object. In some cases, the HTTPError may not have a valid
|
||||
# file object. If this happens, the simplest workaround is to
|
||||
# not initialize the base classes.
|
||||
if fp is not None:
|
||||
self.__super_init(fp, hdrs, url, code)
|
||||
|
||||
def __str__(self):
|
||||
return 'HTTP Error %s: %s' % (self.code, self.msg)
|
||||
|
||||
def __repr__(self):
|
||||
return '<HTTPError %s: %r>' % (self.code, self.msg)
|
||||
|
||||
# since URLError specifies a .reason attribute, HTTPError should also
|
||||
# provide this attribute. See issue13211 for discussion.
|
||||
@property
|
||||
def reason(self):
|
||||
return self.msg
|
||||
|
||||
@property
|
||||
def headers(self):
|
||||
return self.hdrs
|
||||
|
||||
@headers.setter
|
||||
def headers(self, headers):
|
||||
self.hdrs = headers
|
||||
|
||||
|
||||
class ContentTooShortError(URLError):
|
||||
"""Exception raised when downloaded size does not match content-length."""
|
||||
def __init__(self, message, content):
|
||||
URLError.__init__(self, message)
|
||||
self.content = content
|
||||
1009
Lib/urllib/parse.py
Normal file
1009
Lib/urllib/parse.py
Normal file
File diff suppressed because it is too large
Load Diff
2743
Lib/urllib/request.py
Normal file
2743
Lib/urllib/request.py
Normal file
File diff suppressed because it is too large
Load Diff
80
Lib/urllib/response.py
Normal file
80
Lib/urllib/response.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Response classes used by urllib.
|
||||
|
||||
The base class, addbase, defines a minimal file-like interface,
|
||||
including read() and readline(). The typical response object is an
|
||||
addinfourl instance, which defines an info() method that returns
|
||||
headers and a geturl() method that returns the url.
|
||||
"""
|
||||
|
||||
import tempfile
|
||||
|
||||
__all__ = ['addbase', 'addclosehook', 'addinfo', 'addinfourl']
|
||||
|
||||
|
||||
class addbase(tempfile._TemporaryFileWrapper):
|
||||
"""Base class for addinfo and addclosehook. Is a good idea for garbage collection."""
|
||||
|
||||
# XXX Add a method to expose the timeout on the underlying socket?
|
||||
|
||||
def __init__(self, fp):
|
||||
super(addbase, self).__init__(fp, '<urllib response>', delete=False)
|
||||
# Keep reference around as this was part of the original API.
|
||||
self.fp = fp
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
|
||||
id(self), self.file)
|
||||
|
||||
def __enter__(self):
|
||||
if self.fp.closed:
|
||||
raise ValueError("I/O operation on closed file")
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.close()
|
||||
|
||||
|
||||
class addclosehook(addbase):
|
||||
"""Class to add a close hook to an open file."""
|
||||
|
||||
def __init__(self, fp, closehook, *hookargs):
|
||||
super(addclosehook, self).__init__(fp)
|
||||
self.closehook = closehook
|
||||
self.hookargs = hookargs
|
||||
|
||||
def close(self):
|
||||
try:
|
||||
closehook = self.closehook
|
||||
hookargs = self.hookargs
|
||||
if closehook:
|
||||
self.closehook = None
|
||||
self.hookargs = None
|
||||
closehook(*hookargs)
|
||||
finally:
|
||||
super(addclosehook, self).close()
|
||||
|
||||
|
||||
class addinfo(addbase):
|
||||
"""class to add an info() method to an open file."""
|
||||
|
||||
def __init__(self, fp, headers):
|
||||
super(addinfo, self).__init__(fp)
|
||||
self.headers = headers
|
||||
|
||||
def info(self):
|
||||
return self.headers
|
||||
|
||||
|
||||
class addinfourl(addinfo):
|
||||
"""class to add info() and geturl() methods to an open file."""
|
||||
|
||||
def __init__(self, fp, headers, url, code=None):
|
||||
super(addinfourl, self).__init__(fp, headers)
|
||||
self.url = url
|
||||
self.code = code
|
||||
|
||||
def getcode(self):
|
||||
return self.code
|
||||
|
||||
def geturl(self):
|
||||
return self.url
|
||||
251
Lib/urllib/robotparser.py
Normal file
251
Lib/urllib/robotparser.py
Normal file
@@ -0,0 +1,251 @@
|
||||
""" robotparser.py
|
||||
|
||||
Copyright (C) 2000 Bastian Kleineidam
|
||||
|
||||
You can choose between two licenses when using this package:
|
||||
1) GNU GPLv2
|
||||
2) PSF license for Python 2.2
|
||||
|
||||
The robots.txt Exclusion Protocol is implemented as specified in
|
||||
http://www.robotstxt.org/norobots-rfc.txt
|
||||
"""
|
||||
|
||||
import collections
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
__all__ = ["RobotFileParser"]
|
||||
|
||||
class RobotFileParser:
|
||||
""" This class provides a set of methods to read, parse and answer
|
||||
questions about a single robots.txt file.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, url=''):
|
||||
self.entries = []
|
||||
self.default_entry = None
|
||||
self.disallow_all = False
|
||||
self.allow_all = False
|
||||
self.set_url(url)
|
||||
self.last_checked = 0
|
||||
|
||||
def mtime(self):
|
||||
"""Returns the time the robots.txt file was last fetched.
|
||||
|
||||
This is useful for long-running web spiders that need to
|
||||
check for new robots.txt files periodically.
|
||||
|
||||
"""
|
||||
return self.last_checked
|
||||
|
||||
def modified(self):
|
||||
"""Sets the time the robots.txt file was last fetched to the
|
||||
current time.
|
||||
|
||||
"""
|
||||
import time
|
||||
self.last_checked = time.time()
|
||||
|
||||
def set_url(self, url):
|
||||
"""Sets the URL referring to a robots.txt file."""
|
||||
self.url = url
|
||||
self.host, self.path = urllib.parse.urlparse(url)[1:3]
|
||||
|
||||
def read(self):
|
||||
"""Reads the robots.txt URL and feeds it to the parser."""
|
||||
try:
|
||||
f = urllib.request.urlopen(self.url)
|
||||
except urllib.error.HTTPError as err:
|
||||
if err.code in (401, 403):
|
||||
self.disallow_all = True
|
||||
elif err.code >= 400 and err.code < 500:
|
||||
self.allow_all = True
|
||||
else:
|
||||
raw = f.read()
|
||||
self.parse(raw.decode("utf-8").splitlines())
|
||||
|
||||
def _add_entry(self, entry):
|
||||
if "*" in entry.useragents:
|
||||
# the default entry is considered last
|
||||
if self.default_entry is None:
|
||||
# the first default entry wins
|
||||
self.default_entry = entry
|
||||
else:
|
||||
self.entries.append(entry)
|
||||
|
||||
def parse(self, lines):
|
||||
"""Parse the input lines from a robots.txt file.
|
||||
|
||||
We allow that a user-agent: line is not preceded by
|
||||
one or more blank lines.
|
||||
"""
|
||||
# states:
|
||||
# 0: start state
|
||||
# 1: saw user-agent line
|
||||
# 2: saw an allow or disallow line
|
||||
state = 0
|
||||
entry = Entry()
|
||||
|
||||
self.modified()
|
||||
for line in lines:
|
||||
if not line:
|
||||
if state == 1:
|
||||
entry = Entry()
|
||||
state = 0
|
||||
elif state == 2:
|
||||
self._add_entry(entry)
|
||||
entry = Entry()
|
||||
state = 0
|
||||
# remove optional comment and strip line
|
||||
i = line.find('#')
|
||||
if i >= 0:
|
||||
line = line[:i]
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
line = line.split(':', 1)
|
||||
if len(line) == 2:
|
||||
line[0] = line[0].strip().lower()
|
||||
line[1] = urllib.parse.unquote(line[1].strip())
|
||||
if line[0] == "user-agent":
|
||||
if state == 2:
|
||||
self._add_entry(entry)
|
||||
entry = Entry()
|
||||
entry.useragents.append(line[1])
|
||||
state = 1
|
||||
elif line[0] == "disallow":
|
||||
if state != 0:
|
||||
entry.rulelines.append(RuleLine(line[1], False))
|
||||
state = 2
|
||||
elif line[0] == "allow":
|
||||
if state != 0:
|
||||
entry.rulelines.append(RuleLine(line[1], True))
|
||||
state = 2
|
||||
elif line[0] == "crawl-delay":
|
||||
if state != 0:
|
||||
# before trying to convert to int we need to make
|
||||
# sure that robots.txt has valid syntax otherwise
|
||||
# it will crash
|
||||
if line[1].strip().isdigit():
|
||||
entry.delay = int(line[1])
|
||||
state = 2
|
||||
elif line[0] == "request-rate":
|
||||
if state != 0:
|
||||
numbers = line[1].split('/')
|
||||
# check if all values are sane
|
||||
if (len(numbers) == 2 and numbers[0].strip().isdigit()
|
||||
and numbers[1].strip().isdigit()):
|
||||
req_rate = collections.namedtuple('req_rate',
|
||||
'requests seconds')
|
||||
entry.req_rate = req_rate
|
||||
entry.req_rate.requests = int(numbers[0])
|
||||
entry.req_rate.seconds = int(numbers[1])
|
||||
state = 2
|
||||
if state == 2:
|
||||
self._add_entry(entry)
|
||||
|
||||
def can_fetch(self, useragent, url):
|
||||
"""using the parsed robots.txt decide if useragent can fetch url"""
|
||||
if self.disallow_all:
|
||||
return False
|
||||
if self.allow_all:
|
||||
return True
|
||||
# Until the robots.txt file has been read or found not
|
||||
# to exist, we must assume that no url is allowable.
|
||||
# This prevents false positives when a user erroneously
|
||||
# calls can_fetch() before calling read().
|
||||
if not self.last_checked:
|
||||
return False
|
||||
# search for given user agent matches
|
||||
# the first match counts
|
||||
parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url))
|
||||
url = urllib.parse.urlunparse(('','',parsed_url.path,
|
||||
parsed_url.params,parsed_url.query, parsed_url.fragment))
|
||||
url = urllib.parse.quote(url)
|
||||
if not url:
|
||||
url = "/"
|
||||
for entry in self.entries:
|
||||
if entry.applies_to(useragent):
|
||||
return entry.allowance(url)
|
||||
# try the default entry last
|
||||
if self.default_entry:
|
||||
return self.default_entry.allowance(url)
|
||||
# agent not found ==> access granted
|
||||
return True
|
||||
|
||||
def crawl_delay(self, useragent):
|
||||
if not self.mtime():
|
||||
return None
|
||||
for entry in self.entries:
|
||||
if entry.applies_to(useragent):
|
||||
return entry.delay
|
||||
return self.default_entry.delay
|
||||
|
||||
def request_rate(self, useragent):
|
||||
if not self.mtime():
|
||||
return None
|
||||
for entry in self.entries:
|
||||
if entry.applies_to(useragent):
|
||||
return entry.req_rate
|
||||
return self.default_entry.req_rate
|
||||
|
||||
def __str__(self):
|
||||
return ''.join([str(entry) + "\n" for entry in self.entries])
|
||||
|
||||
|
||||
class RuleLine:
|
||||
"""A rule line is a single "Allow:" (allowance==True) or "Disallow:"
|
||||
(allowance==False) followed by a path."""
|
||||
def __init__(self, path, allowance):
|
||||
if path == '' and not allowance:
|
||||
# an empty value means allow all
|
||||
allowance = True
|
||||
path = urllib.parse.urlunparse(urllib.parse.urlparse(path))
|
||||
self.path = urllib.parse.quote(path)
|
||||
self.allowance = allowance
|
||||
|
||||
def applies_to(self, filename):
|
||||
return self.path == "*" or filename.startswith(self.path)
|
||||
|
||||
def __str__(self):
|
||||
return ("Allow" if self.allowance else "Disallow") + ": " + self.path
|
||||
|
||||
|
||||
class Entry:
|
||||
"""An entry has one or more user-agents and zero or more rulelines"""
|
||||
def __init__(self):
|
||||
self.useragents = []
|
||||
self.rulelines = []
|
||||
self.delay = None
|
||||
self.req_rate = None
|
||||
|
||||
def __str__(self):
|
||||
ret = []
|
||||
for agent in self.useragents:
|
||||
ret.extend(["User-agent: ", agent, "\n"])
|
||||
for line in self.rulelines:
|
||||
ret.extend([str(line), "\n"])
|
||||
return ''.join(ret)
|
||||
|
||||
def applies_to(self, useragent):
|
||||
"""check if this entry applies to the specified agent"""
|
||||
# split the name token and make it lower case
|
||||
useragent = useragent.split("/")[0].lower()
|
||||
for agent in self.useragents:
|
||||
if agent == '*':
|
||||
# we have the catch-all agent
|
||||
return True
|
||||
agent = agent.lower()
|
||||
if agent in useragent:
|
||||
return True
|
||||
return False
|
||||
|
||||
def allowance(self, filename):
|
||||
"""Preconditions:
|
||||
- our agent applies to this entry
|
||||
- filename is URL decoded"""
|
||||
for line in self.rulelines:
|
||||
if line.applies_to(filename):
|
||||
return line.allowance
|
||||
return True
|
||||
199
Lib/uu.py
vendored
Executable file
199
Lib/uu.py
vendored
Executable file
@@ -0,0 +1,199 @@
|
||||
#! /usr/bin/env python3
|
||||
|
||||
# Copyright 1994 by Lance Ellinghouse
|
||||
# Cathedral City, California Republic, United States of America.
|
||||
# All Rights Reserved
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose and without fee is hereby granted,
|
||||
# provided that the above copyright notice appear in all copies and that
|
||||
# both that copyright notice and this permission notice appear in
|
||||
# supporting documentation, and that the name of Lance Ellinghouse
|
||||
# not be used in advertising or publicity pertaining to distribution
|
||||
# of the software without specific, written prior permission.
|
||||
# LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
# FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE
|
||||
# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
# Modified by Jack Jansen, CWI, July 1995:
|
||||
# - Use binascii module to do the actual line-by-line conversion
|
||||
# between ascii and binary. This results in a 1000-fold speedup. The C
|
||||
# version is still 5 times faster, though.
|
||||
# - Arguments more compliant with python standard
|
||||
|
||||
"""Implementation of the UUencode and UUdecode functions.
|
||||
|
||||
encode(in_file, out_file [,name, mode])
|
||||
decode(in_file [, out_file, mode])
|
||||
"""
|
||||
|
||||
import binascii
|
||||
import os
|
||||
import sys
|
||||
|
||||
__all__ = ["Error", "encode", "decode"]
|
||||
|
||||
class Error(Exception):
|
||||
pass
|
||||
|
||||
def encode(in_file, out_file, name=None, mode=None):
|
||||
"""Uuencode file"""
|
||||
#
|
||||
# If in_file is a pathname open it and change defaults
|
||||
#
|
||||
opened_files = []
|
||||
try:
|
||||
if in_file == '-':
|
||||
in_file = sys.stdin.buffer
|
||||
elif isinstance(in_file, str):
|
||||
if name is None:
|
||||
name = os.path.basename(in_file)
|
||||
if mode is None:
|
||||
try:
|
||||
mode = os.stat(in_file).st_mode
|
||||
except AttributeError:
|
||||
pass
|
||||
in_file = open(in_file, 'rb')
|
||||
opened_files.append(in_file)
|
||||
#
|
||||
# Open out_file if it is a pathname
|
||||
#
|
||||
if out_file == '-':
|
||||
out_file = sys.stdout.buffer
|
||||
elif isinstance(out_file, str):
|
||||
out_file = open(out_file, 'wb')
|
||||
opened_files.append(out_file)
|
||||
#
|
||||
# Set defaults for name and mode
|
||||
#
|
||||
if name is None:
|
||||
name = '-'
|
||||
if mode is None:
|
||||
mode = 0o666
|
||||
#
|
||||
# Write the data
|
||||
#
|
||||
out_file.write(('begin %o %s\n' % ((mode & 0o777), name)).encode("ascii"))
|
||||
data = in_file.read(45)
|
||||
while len(data) > 0:
|
||||
out_file.write(binascii.b2a_uu(data))
|
||||
data = in_file.read(45)
|
||||
out_file.write(b' \nend\n')
|
||||
finally:
|
||||
for f in opened_files:
|
||||
f.close()
|
||||
|
||||
|
||||
def decode(in_file, out_file=None, mode=None, quiet=False):
|
||||
"""Decode uuencoded file"""
|
||||
#
|
||||
# Open the input file, if needed.
|
||||
#
|
||||
opened_files = []
|
||||
if in_file == '-':
|
||||
in_file = sys.stdin.buffer
|
||||
elif isinstance(in_file, str):
|
||||
in_file = open(in_file, 'rb')
|
||||
opened_files.append(in_file)
|
||||
|
||||
try:
|
||||
#
|
||||
# Read until a begin is encountered or we've exhausted the file
|
||||
#
|
||||
while True:
|
||||
hdr = in_file.readline()
|
||||
if not hdr:
|
||||
raise Error('No valid begin line found in input file')
|
||||
if not hdr.startswith(b'begin'):
|
||||
continue
|
||||
hdrfields = hdr.split(b' ', 2)
|
||||
if len(hdrfields) == 3 and hdrfields[0] == b'begin':
|
||||
try:
|
||||
int(hdrfields[1], 8)
|
||||
break
|
||||
except ValueError:
|
||||
pass
|
||||
if out_file is None:
|
||||
# If the filename isn't ASCII, what's up with that?!?
|
||||
out_file = hdrfields[2].rstrip(b' \t\r\n\f').decode("ascii")
|
||||
if os.path.exists(out_file):
|
||||
raise Error('Cannot overwrite existing file: %s' % out_file)
|
||||
if mode is None:
|
||||
mode = int(hdrfields[1], 8)
|
||||
#
|
||||
# Open the output file
|
||||
#
|
||||
if out_file == '-':
|
||||
out_file = sys.stdout.buffer
|
||||
elif isinstance(out_file, str):
|
||||
fp = open(out_file, 'wb')
|
||||
try:
|
||||
os.path.chmod(out_file, mode)
|
||||
except AttributeError:
|
||||
pass
|
||||
out_file = fp
|
||||
opened_files.append(out_file)
|
||||
#
|
||||
# Main decoding loop
|
||||
#
|
||||
s = in_file.readline()
|
||||
while s and s.strip(b' \t\r\n\f') != b'end':
|
||||
try:
|
||||
data = binascii.a2b_uu(s)
|
||||
except binascii.Error as v:
|
||||
# Workaround for broken uuencoders by /Fredrik Lundh
|
||||
nbytes = (((s[0]-32) & 63) * 4 + 5) // 3
|
||||
data = binascii.a2b_uu(s[:nbytes])
|
||||
if not quiet:
|
||||
sys.stderr.write("Warning: %s\n" % v)
|
||||
out_file.write(data)
|
||||
s = in_file.readline()
|
||||
if not s:
|
||||
raise Error('Truncated input file')
|
||||
finally:
|
||||
for f in opened_files:
|
||||
f.close()
|
||||
|
||||
def test():
|
||||
"""uuencode/uudecode main program"""
|
||||
|
||||
import optparse
|
||||
parser = optparse.OptionParser(usage='usage: %prog [-d] [-t] [input [output]]')
|
||||
parser.add_option('-d', '--decode', dest='decode', help='Decode (instead of encode)?', default=False, action='store_true')
|
||||
parser.add_option('-t', '--text', dest='text', help='data is text, encoded format unix-compatible text?', default=False, action='store_true')
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
if len(args) > 2:
|
||||
parser.error('incorrect number of arguments')
|
||||
sys.exit(1)
|
||||
|
||||
# Use the binary streams underlying stdin/stdout
|
||||
input = sys.stdin.buffer
|
||||
output = sys.stdout.buffer
|
||||
if len(args) > 0:
|
||||
input = args[0]
|
||||
if len(args) > 1:
|
||||
output = args[1]
|
||||
|
||||
if options.decode:
|
||||
if options.text:
|
||||
if isinstance(output, str):
|
||||
output = open(output, 'wb')
|
||||
else:
|
||||
print(sys.argv[0], ': cannot do -t to stdout')
|
||||
sys.exit(1)
|
||||
decode(input, output)
|
||||
else:
|
||||
if options.text:
|
||||
if isinstance(input, str):
|
||||
input = open(input, 'rb')
|
||||
else:
|
||||
print(sys.argv[0], ': cannot do -t from stdin')
|
||||
sys.exit(1)
|
||||
encode(input, output)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
615
Lib/uuid.py
vendored
Normal file
615
Lib/uuid.py
vendored
Normal file
@@ -0,0 +1,615 @@
|
||||
r"""UUID objects (universally unique identifiers) according to RFC 4122.
|
||||
|
||||
This module provides immutable UUID objects (class UUID) and the functions
|
||||
uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5
|
||||
UUIDs as specified in RFC 4122.
|
||||
|
||||
If all you want is a unique ID, you should probably call uuid1() or uuid4().
|
||||
Note that uuid1() may compromise privacy since it creates a UUID containing
|
||||
the computer's network address. uuid4() creates a random UUID.
|
||||
|
||||
Typical usage:
|
||||
|
||||
>>> import uuid
|
||||
|
||||
# make a UUID based on the host ID and current time
|
||||
>>> uuid.uuid1() # doctest: +SKIP
|
||||
UUID('a8098c1a-f86e-11da-bd1a-00112444be1e')
|
||||
|
||||
# make a UUID using an MD5 hash of a namespace UUID and a name
|
||||
>>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org')
|
||||
UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e')
|
||||
|
||||
# make a random UUID
|
||||
>>> uuid.uuid4() # doctest: +SKIP
|
||||
UUID('16fd2706-8baf-433b-82eb-8c7fada847da')
|
||||
|
||||
# make a UUID using a SHA-1 hash of a namespace UUID and a name
|
||||
>>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org')
|
||||
UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d')
|
||||
|
||||
# make a UUID from a string of hex digits (braces and hyphens ignored)
|
||||
>>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}')
|
||||
|
||||
# convert a UUID to a string of hex digits in standard form
|
||||
>>> str(x)
|
||||
'00010203-0405-0607-0809-0a0b0c0d0e0f'
|
||||
|
||||
# get the raw 16 bytes of the UUID
|
||||
>>> x.bytes
|
||||
b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
|
||||
|
||||
# make a UUID from a 16-byte string
|
||||
>>> uuid.UUID(bytes=x.bytes)
|
||||
UUID('00010203-0405-0607-0809-0a0b0c0d0e0f')
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
__author__ = 'Ka-Ping Yee <ping@zesty.ca>'
|
||||
|
||||
RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [
|
||||
'reserved for NCS compatibility', 'specified in RFC 4122',
|
||||
'reserved for Microsoft compatibility', 'reserved for future definition']
|
||||
|
||||
int_ = int # The built-in int type
|
||||
bytes_ = bytes # The built-in bytes type
|
||||
|
||||
class UUID(object):
|
||||
"""Instances of the UUID class represent UUIDs as specified in RFC 4122.
|
||||
UUID objects are immutable, hashable, and usable as dictionary keys.
|
||||
Converting a UUID to a string with str() yields something in the form
|
||||
'12345678-1234-1234-1234-123456789abc'. The UUID constructor accepts
|
||||
five possible forms: a similar string of hexadecimal digits, or a tuple
|
||||
of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and
|
||||
48-bit values respectively) as an argument named 'fields', or a string
|
||||
of 16 bytes (with all the integer fields in big-endian order) as an
|
||||
argument named 'bytes', or a string of 16 bytes (with the first three
|
||||
fields in little-endian order) as an argument named 'bytes_le', or a
|
||||
single 128-bit integer as an argument named 'int'.
|
||||
|
||||
UUIDs have these read-only attributes:
|
||||
|
||||
bytes the UUID as a 16-byte string (containing the six
|
||||
integer fields in big-endian byte order)
|
||||
|
||||
bytes_le the UUID as a 16-byte string (with time_low, time_mid,
|
||||
and time_hi_version in little-endian byte order)
|
||||
|
||||
fields a tuple of the six integer fields of the UUID,
|
||||
which are also available as six individual attributes
|
||||
and two derived attributes:
|
||||
|
||||
time_low the first 32 bits of the UUID
|
||||
time_mid the next 16 bits of the UUID
|
||||
time_hi_version the next 16 bits of the UUID
|
||||
clock_seq_hi_variant the next 8 bits of the UUID
|
||||
clock_seq_low the next 8 bits of the UUID
|
||||
node the last 48 bits of the UUID
|
||||
|
||||
time the 60-bit timestamp
|
||||
clock_seq the 14-bit sequence number
|
||||
|
||||
hex the UUID as a 32-character hexadecimal string
|
||||
|
||||
int the UUID as a 128-bit integer
|
||||
|
||||
urn the UUID as a URN as specified in RFC 4122
|
||||
|
||||
variant the UUID variant (one of the constants RESERVED_NCS,
|
||||
RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE)
|
||||
|
||||
version the UUID version number (1 through 5, meaningful only
|
||||
when the variant is RFC_4122)
|
||||
"""
|
||||
|
||||
def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
|
||||
int=None, version=None):
|
||||
r"""Create a UUID from either a string of 32 hexadecimal digits,
|
||||
a string of 16 bytes as the 'bytes' argument, a string of 16 bytes
|
||||
in little-endian order as the 'bytes_le' argument, a tuple of six
|
||||
integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version,
|
||||
8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as
|
||||
the 'fields' argument, or a single 128-bit integer as the 'int'
|
||||
argument. When a string of hex digits is given, curly braces,
|
||||
hyphens, and a URN prefix are all optional. For example, these
|
||||
expressions all yield the same UUID:
|
||||
|
||||
UUID('{12345678-1234-5678-1234-567812345678}')
|
||||
UUID('12345678123456781234567812345678')
|
||||
UUID('urn:uuid:12345678-1234-5678-1234-567812345678')
|
||||
UUID(bytes='\x12\x34\x56\x78'*4)
|
||||
UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' +
|
||||
'\x12\x34\x56\x78\x12\x34\x56\x78')
|
||||
UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678))
|
||||
UUID(int=0x12345678123456781234567812345678)
|
||||
|
||||
Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must
|
||||
be given. The 'version' argument is optional; if given, the resulting
|
||||
UUID will have its variant and version set according to RFC 4122,
|
||||
overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'.
|
||||
"""
|
||||
|
||||
if [hex, bytes, bytes_le, fields, int].count(None) != 4:
|
||||
raise TypeError('one of the hex, bytes, bytes_le, fields, '
|
||||
'or int arguments must be given')
|
||||
if hex is not None:
|
||||
hex = hex.replace('urn:', '').replace('uuid:', '')
|
||||
hex = hex.strip('{}').replace('-', '')
|
||||
if len(hex) != 32:
|
||||
raise ValueError('badly formed hexadecimal UUID string')
|
||||
int = int_(hex, 16)
|
||||
if bytes_le is not None:
|
||||
if len(bytes_le) != 16:
|
||||
raise ValueError('bytes_le is not a 16-char string')
|
||||
bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] +
|
||||
bytes_le[8-1:6-1:-1] + bytes_le[8:])
|
||||
if bytes is not None:
|
||||
if len(bytes) != 16:
|
||||
raise ValueError('bytes is not a 16-char string')
|
||||
assert isinstance(bytes, bytes_), repr(bytes)
|
||||
int = int_.from_bytes(bytes, byteorder='big')
|
||||
if fields is not None:
|
||||
if len(fields) != 6:
|
||||
raise ValueError('fields is not a 6-tuple')
|
||||
(time_low, time_mid, time_hi_version,
|
||||
clock_seq_hi_variant, clock_seq_low, node) = fields
|
||||
if not 0 <= time_low < 1<<32:
|
||||
raise ValueError('field 1 out of range (need a 32-bit value)')
|
||||
if not 0 <= time_mid < 1<<16:
|
||||
raise ValueError('field 2 out of range (need a 16-bit value)')
|
||||
if not 0 <= time_hi_version < 1<<16:
|
||||
raise ValueError('field 3 out of range (need a 16-bit value)')
|
||||
if not 0 <= clock_seq_hi_variant < 1<<8:
|
||||
raise ValueError('field 4 out of range (need an 8-bit value)')
|
||||
if not 0 <= clock_seq_low < 1<<8:
|
||||
raise ValueError('field 5 out of range (need an 8-bit value)')
|
||||
if not 0 <= node < 1<<48:
|
||||
raise ValueError('field 6 out of range (need a 48-bit value)')
|
||||
clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
|
||||
int = ((time_low << 96) | (time_mid << 80) |
|
||||
(time_hi_version << 64) | (clock_seq << 48) | node)
|
||||
if int is not None:
|
||||
if not 0 <= int < 1<<128:
|
||||
raise ValueError('int is out of range (need a 128-bit value)')
|
||||
if version is not None:
|
||||
if not 1 <= version <= 5:
|
||||
raise ValueError('illegal version number')
|
||||
# Set the variant to RFC 4122.
|
||||
int &= ~(0xc000 << 48)
|
||||
int |= 0x8000 << 48
|
||||
# Set the version number.
|
||||
int &= ~(0xf000 << 64)
|
||||
int |= version << 76
|
||||
self.__dict__['int'] = int
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, UUID):
|
||||
return self.int == other.int
|
||||
return NotImplemented
|
||||
|
||||
# Q. What's the value of being able to sort UUIDs?
|
||||
# A. Use them as keys in a B-Tree or similar mapping.
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, UUID):
|
||||
return self.int < other.int
|
||||
return NotImplemented
|
||||
|
||||
def __gt__(self, other):
|
||||
if isinstance(other, UUID):
|
||||
return self.int > other.int
|
||||
return NotImplemented
|
||||
|
||||
def __le__(self, other):
|
||||
if isinstance(other, UUID):
|
||||
return self.int <= other.int
|
||||
return NotImplemented
|
||||
|
||||
def __ge__(self, other):
|
||||
if isinstance(other, UUID):
|
||||
return self.int >= other.int
|
||||
return NotImplemented
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.int)
|
||||
|
||||
def __int__(self):
|
||||
return self.int
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r)' % (self.__class__.__name__, str(self))
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
raise TypeError('UUID objects are immutable')
|
||||
|
||||
def __str__(self):
|
||||
hex = '%032x' % self.int
|
||||
return '%s-%s-%s-%s-%s' % (
|
||||
hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:])
|
||||
|
||||
@property
|
||||
def bytes(self):
|
||||
return self.int.to_bytes(16, 'big')
|
||||
|
||||
@property
|
||||
def bytes_le(self):
|
||||
bytes = self.bytes
|
||||
return (bytes[4-1::-1] + bytes[6-1:4-1:-1] + bytes[8-1:6-1:-1] +
|
||||
bytes[8:])
|
||||
|
||||
@property
|
||||
def fields(self):
|
||||
return (self.time_low, self.time_mid, self.time_hi_version,
|
||||
self.clock_seq_hi_variant, self.clock_seq_low, self.node)
|
||||
|
||||
@property
|
||||
def time_low(self):
|
||||
return self.int >> 96
|
||||
|
||||
@property
|
||||
def time_mid(self):
|
||||
return (self.int >> 80) & 0xffff
|
||||
|
||||
@property
|
||||
def time_hi_version(self):
|
||||
return (self.int >> 64) & 0xffff
|
||||
|
||||
@property
|
||||
def clock_seq_hi_variant(self):
|
||||
return (self.int >> 56) & 0xff
|
||||
|
||||
@property
|
||||
def clock_seq_low(self):
|
||||
return (self.int >> 48) & 0xff
|
||||
|
||||
@property
|
||||
def time(self):
|
||||
return (((self.time_hi_version & 0x0fff) << 48) |
|
||||
(self.time_mid << 32) | self.time_low)
|
||||
|
||||
@property
|
||||
def clock_seq(self):
|
||||
return (((self.clock_seq_hi_variant & 0x3f) << 8) |
|
||||
self.clock_seq_low)
|
||||
|
||||
@property
|
||||
def node(self):
|
||||
return self.int & 0xffffffffffff
|
||||
|
||||
@property
|
||||
def hex(self):
|
||||
return '%032x' % self.int
|
||||
|
||||
@property
|
||||
def urn(self):
|
||||
return 'urn:uuid:' + str(self)
|
||||
|
||||
@property
|
||||
def variant(self):
|
||||
if not self.int & (0x8000 << 48):
|
||||
return RESERVED_NCS
|
||||
elif not self.int & (0x4000 << 48):
|
||||
return RFC_4122
|
||||
elif not self.int & (0x2000 << 48):
|
||||
return RESERVED_MICROSOFT
|
||||
else:
|
||||
return RESERVED_FUTURE
|
||||
|
||||
@property
|
||||
def version(self):
|
||||
# The version bits are only meaningful for RFC 4122 UUIDs.
|
||||
if self.variant == RFC_4122:
|
||||
return int((self.int >> 76) & 0xf)
|
||||
|
||||
def _popen(command, *args):
|
||||
import os, shutil, subprocess
|
||||
executable = shutil.which(command)
|
||||
if executable is None:
|
||||
path = os.pathsep.join(('/sbin', '/usr/sbin'))
|
||||
executable = shutil.which(command, path=path)
|
||||
if executable is None:
|
||||
return None
|
||||
# LC_ALL=C to ensure English output, stderr=DEVNULL to prevent output
|
||||
# on stderr (Note: we don't have an example where the words we search
|
||||
# for are actually localized, but in theory some system could do so.)
|
||||
env = dict(os.environ)
|
||||
env['LC_ALL'] = 'C'
|
||||
proc = subprocess.Popen((executable,) + args,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL,
|
||||
env=env)
|
||||
return proc
|
||||
|
||||
def _find_mac(command, args, hw_identifiers, get_index):
|
||||
try:
|
||||
proc = _popen(command, *args.split())
|
||||
if not proc:
|
||||
return
|
||||
with proc:
|
||||
for line in proc.stdout:
|
||||
words = line.lower().rstrip().split()
|
||||
for i in range(len(words)):
|
||||
if words[i] in hw_identifiers:
|
||||
try:
|
||||
word = words[get_index(i)]
|
||||
mac = int(word.replace(b':', b''), 16)
|
||||
if mac:
|
||||
return mac
|
||||
except (ValueError, IndexError):
|
||||
# Virtual interfaces, such as those provided by
|
||||
# VPNs, do not have a colon-delimited MAC address
|
||||
# as expected, but a 16-byte HWAddr separated by
|
||||
# dashes. These should be ignored in favor of a
|
||||
# real MAC address
|
||||
pass
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def _ifconfig_getnode():
|
||||
"""Get the hardware address on Unix by running ifconfig."""
|
||||
# This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes.
|
||||
for args in ('', '-a', '-av'):
|
||||
mac = _find_mac('ifconfig', args, [b'hwaddr', b'ether'], lambda i: i+1)
|
||||
if mac:
|
||||
return mac
|
||||
|
||||
def _ip_getnode():
|
||||
"""Get the hardware address on Unix by running ip."""
|
||||
# This works on Linux with iproute2.
|
||||
mac = _find_mac('ip', 'link list', [b'link/ether'], lambda i: i+1)
|
||||
if mac:
|
||||
return mac
|
||||
|
||||
def _arp_getnode():
|
||||
"""Get the hardware address on Unix by running arp."""
|
||||
import os, socket
|
||||
try:
|
||||
ip_addr = socket.gethostbyname(socket.gethostname())
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
# Try getting the MAC addr from arp based on our IP address (Solaris).
|
||||
return _find_mac('arp', '-an', [os.fsencode(ip_addr)], lambda i: -1)
|
||||
|
||||
def _lanscan_getnode():
|
||||
"""Get the hardware address on Unix by running lanscan."""
|
||||
# This might work on HP-UX.
|
||||
return _find_mac('lanscan', '-ai', [b'lan0'], lambda i: 0)
|
||||
|
||||
def _netstat_getnode():
|
||||
"""Get the hardware address on Unix by running netstat."""
|
||||
# This might work on AIX, Tru64 UNIX and presumably on IRIX.
|
||||
try:
|
||||
proc = _popen('netstat', '-ia')
|
||||
if not proc:
|
||||
return
|
||||
with proc:
|
||||
words = proc.stdout.readline().rstrip().split()
|
||||
try:
|
||||
i = words.index(b'Address')
|
||||
except ValueError:
|
||||
return
|
||||
for line in proc.stdout:
|
||||
try:
|
||||
words = line.rstrip().split()
|
||||
word = words[i]
|
||||
if len(word) == 17 and word.count(b':') == 5:
|
||||
mac = int(word.replace(b':', b''), 16)
|
||||
if mac:
|
||||
return mac
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def _ipconfig_getnode():
|
||||
"""Get the hardware address on Windows by running ipconfig.exe."""
|
||||
import os, re
|
||||
dirs = ['', r'c:\windows\system32', r'c:\winnt\system32']
|
||||
try:
|
||||
import ctypes
|
||||
buffer = ctypes.create_string_buffer(300)
|
||||
ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300)
|
||||
dirs.insert(0, buffer.value.decode('mbcs'))
|
||||
except:
|
||||
pass
|
||||
for dir in dirs:
|
||||
try:
|
||||
pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all')
|
||||
except OSError:
|
||||
continue
|
||||
with pipe:
|
||||
for line in pipe:
|
||||
value = line.split(':')[-1].strip().lower()
|
||||
if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value):
|
||||
return int(value.replace('-', ''), 16)
|
||||
|
||||
def _netbios_getnode():
|
||||
"""Get the hardware address on Windows using NetBIOS calls.
|
||||
See http://support.microsoft.com/kb/118623 for details."""
|
||||
import win32wnet, netbios
|
||||
ncb = netbios.NCB()
|
||||
ncb.Command = netbios.NCBENUM
|
||||
ncb.Buffer = adapters = netbios.LANA_ENUM()
|
||||
adapters._pack()
|
||||
if win32wnet.Netbios(ncb) != 0:
|
||||
return
|
||||
adapters._unpack()
|
||||
for i in range(adapters.length):
|
||||
ncb.Reset()
|
||||
ncb.Command = netbios.NCBRESET
|
||||
ncb.Lana_num = ord(adapters.lana[i])
|
||||
if win32wnet.Netbios(ncb) != 0:
|
||||
continue
|
||||
ncb.Reset()
|
||||
ncb.Command = netbios.NCBASTAT
|
||||
ncb.Lana_num = ord(adapters.lana[i])
|
||||
ncb.Callname = '*'.ljust(16)
|
||||
ncb.Buffer = status = netbios.ADAPTER_STATUS()
|
||||
if win32wnet.Netbios(ncb) != 0:
|
||||
continue
|
||||
status._unpack()
|
||||
bytes = status.adapter_address[:6]
|
||||
if len(bytes) != 6:
|
||||
continue
|
||||
return int.from_bytes(bytes, 'big')
|
||||
|
||||
# Thanks to Thomas Heller for ctypes and for his help with its use here.
|
||||
|
||||
# If ctypes is available, use it to find system routines for UUID generation.
|
||||
# XXX This makes the module non-thread-safe!
|
||||
_uuid_generate_time = _UuidCreate = None
|
||||
try:
|
||||
import ctypes, ctypes.util
|
||||
import sys
|
||||
|
||||
# The uuid_generate_* routines are provided by libuuid on at least
|
||||
# Linux and FreeBSD, and provided by libc on Mac OS X.
|
||||
_libnames = ['uuid']
|
||||
if not sys.platform.startswith('win'):
|
||||
_libnames.append('c')
|
||||
for libname in _libnames:
|
||||
try:
|
||||
lib = ctypes.CDLL(ctypes.util.find_library(libname))
|
||||
except Exception:
|
||||
continue
|
||||
if hasattr(lib, 'uuid_generate_time'):
|
||||
_uuid_generate_time = lib.uuid_generate_time
|
||||
break
|
||||
del _libnames
|
||||
|
||||
# The uuid_generate_* functions are broken on MacOS X 10.5, as noted
|
||||
# in issue #8621 the function generates the same sequence of values
|
||||
# in the parent process and all children created using fork (unless
|
||||
# those children use exec as well).
|
||||
#
|
||||
# Assume that the uuid_generate functions are broken from 10.5 onward,
|
||||
# the test can be adjusted when a later version is fixed.
|
||||
if sys.platform == 'darwin':
|
||||
if int(os.uname().release.split('.')[0]) >= 9:
|
||||
_uuid_generate_time = None
|
||||
|
||||
# On Windows prior to 2000, UuidCreate gives a UUID containing the
|
||||
# hardware address. On Windows 2000 and later, UuidCreate makes a
|
||||
# random UUID and UuidCreateSequential gives a UUID containing the
|
||||
# hardware address. These routines are provided by the RPC runtime.
|
||||
# NOTE: at least on Tim's WinXP Pro SP2 desktop box, while the last
|
||||
# 6 bytes returned by UuidCreateSequential are fixed, they don't appear
|
||||
# to bear any relationship to the MAC address of any network device
|
||||
# on the box.
|
||||
try:
|
||||
lib = ctypes.windll.rpcrt4
|
||||
except:
|
||||
lib = None
|
||||
_UuidCreate = getattr(lib, 'UuidCreateSequential',
|
||||
getattr(lib, 'UuidCreate', None))
|
||||
except:
|
||||
pass
|
||||
|
||||
def _unixdll_getnode():
|
||||
"""Get the hardware address on Unix using ctypes."""
|
||||
_buffer = ctypes.create_string_buffer(16)
|
||||
_uuid_generate_time(_buffer)
|
||||
return UUID(bytes=bytes_(_buffer.raw)).node
|
||||
|
||||
def _windll_getnode():
|
||||
"""Get the hardware address on Windows using ctypes."""
|
||||
_buffer = ctypes.create_string_buffer(16)
|
||||
if _UuidCreate(_buffer) == 0:
|
||||
return UUID(bytes=bytes_(_buffer.raw)).node
|
||||
|
||||
def _random_getnode():
|
||||
"""Get a random node ID, with eighth bit set as suggested by RFC 4122."""
|
||||
import random
|
||||
return random.getrandbits(48) | 0x010000000000
|
||||
|
||||
_node = None
|
||||
|
||||
def getnode():
|
||||
"""Get the hardware address as a 48-bit positive integer.
|
||||
|
||||
The first time this runs, it may launch a separate program, which could
|
||||
be quite slow. If all attempts to obtain the hardware address fail, we
|
||||
choose a random 48-bit number with its eighth bit set to 1 as recommended
|
||||
in RFC 4122.
|
||||
"""
|
||||
|
||||
global _node
|
||||
if _node is not None:
|
||||
return _node
|
||||
|
||||
import sys
|
||||
if sys.platform == 'win32':
|
||||
getters = [_windll_getnode, _netbios_getnode, _ipconfig_getnode]
|
||||
else:
|
||||
getters = [_unixdll_getnode, _ifconfig_getnode, _ip_getnode,
|
||||
_arp_getnode, _lanscan_getnode, _netstat_getnode]
|
||||
|
||||
for getter in getters + [_random_getnode]:
|
||||
try:
|
||||
_node = getter()
|
||||
except:
|
||||
continue
|
||||
if _node is not None:
|
||||
return _node
|
||||
|
||||
_last_timestamp = None
|
||||
|
||||
def uuid1(node=None, clock_seq=None):
|
||||
"""Generate a UUID from a host ID, sequence number, and the current time.
|
||||
If 'node' is not given, getnode() is used to obtain the hardware
|
||||
address. If 'clock_seq' is given, it is used as the sequence number;
|
||||
otherwise a random 14-bit sequence number is chosen."""
|
||||
|
||||
# When the system provides a version-1 UUID generator, use it (but don't
|
||||
# use UuidCreate here because its UUIDs don't conform to RFC 4122).
|
||||
if _uuid_generate_time and node is clock_seq is None:
|
||||
_buffer = ctypes.create_string_buffer(16)
|
||||
_uuid_generate_time(_buffer)
|
||||
return UUID(bytes=bytes_(_buffer.raw))
|
||||
|
||||
global _last_timestamp
|
||||
import time
|
||||
nanoseconds = int(time.time() * 1e9)
|
||||
# 0x01b21dd213814000 is the number of 100-ns intervals between the
|
||||
# UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
|
||||
timestamp = int(nanoseconds/100) + 0x01b21dd213814000
|
||||
if _last_timestamp is not None and timestamp <= _last_timestamp:
|
||||
timestamp = _last_timestamp + 1
|
||||
_last_timestamp = timestamp
|
||||
if clock_seq is None:
|
||||
import random
|
||||
clock_seq = random.getrandbits(14) # instead of stable storage
|
||||
time_low = timestamp & 0xffffffff
|
||||
time_mid = (timestamp >> 32) & 0xffff
|
||||
time_hi_version = (timestamp >> 48) & 0x0fff
|
||||
clock_seq_low = clock_seq & 0xff
|
||||
clock_seq_hi_variant = (clock_seq >> 8) & 0x3f
|
||||
if node is None:
|
||||
node = getnode()
|
||||
return UUID(fields=(time_low, time_mid, time_hi_version,
|
||||
clock_seq_hi_variant, clock_seq_low, node), version=1)
|
||||
|
||||
def uuid3(namespace, name):
|
||||
"""Generate a UUID from the MD5 hash of a namespace UUID and a name."""
|
||||
from hashlib import md5
|
||||
hash = md5(namespace.bytes + bytes(name, "utf-8")).digest()
|
||||
return UUID(bytes=hash[:16], version=3)
|
||||
|
||||
def uuid4():
|
||||
"""Generate a random UUID."""
|
||||
return UUID(bytes=os.urandom(16), version=4)
|
||||
|
||||
def uuid5(namespace, name):
|
||||
"""Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
|
||||
from hashlib import sha1
|
||||
hash = sha1(namespace.bytes + bytes(name, "utf-8")).digest()
|
||||
return UUID(bytes=hash[:16], version=5)
|
||||
|
||||
# The following standard UUIDs are for use with uuid3() or uuid5().
|
||||
|
||||
NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
|
||||
NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8')
|
||||
NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8')
|
||||
NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8')
|
||||
@@ -85,6 +85,8 @@ impl ByteInnerNewOptions {
|
||||
vm.new_type_error("string argument without an encoding".to_string())
|
||||
);
|
||||
}
|
||||
i @ PyBytes => Ok(i.get_value().to_vec()),
|
||||
j @ PyByteArray => Ok(j.inner.borrow().elements.to_vec()),
|
||||
obj => {
|
||||
// TODO: only support this method in the bytes() constructor
|
||||
if let Some(bytes_method) = vm.get_method(obj.clone(), "__bytes__") {
|
||||
|
||||
@@ -1183,6 +1183,22 @@ pub trait PyClassImpl: PyClassDef {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: find a better place to put this impl
|
||||
impl TryFromObject for std::time::Duration {
|
||||
fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self> {
|
||||
use std::time::Duration;
|
||||
u64::try_from_object(vm, obj.clone())
|
||||
.map(Duration::from_secs)
|
||||
.or_else(|_| f64::try_from_object(vm, obj.clone()).map(Duration::from_secs_f64))
|
||||
.map_err(|_| {
|
||||
vm.new_type_error(format!(
|
||||
"expected an int or float for duration, got {}",
|
||||
obj.class()
|
||||
))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -288,6 +288,13 @@ impl PyBytesIORef {
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate(self, size: OptionalOption<usize>, vm: &VirtualMachine) -> PyResult<()> {
|
||||
let mut buffer = self.buffer(vm)?;
|
||||
let size = size.flat_option().unwrap_or_else(|| buffer.tell() as usize);
|
||||
buffer.cursor.get_mut().truncate(size);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn closed(self, _vm: &VirtualMachine) -> bool {
|
||||
self.buffer.borrow().is_none()
|
||||
}
|
||||
@@ -1027,6 +1034,7 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
|
||||
"getvalue" => ctx.new_rustfunc(PyBytesIORef::getvalue),
|
||||
"tell" => ctx.new_rustfunc(PyBytesIORef::tell),
|
||||
"readline" => ctx.new_rustfunc(PyBytesIORef::readline),
|
||||
"truncate" => ctx.new_rustfunc(PyBytesIORef::truncate),
|
||||
"closed" => ctx.new_property(PyBytesIORef::closed),
|
||||
"close" => ctx.new_rustfunc(PyBytesIORef::close),
|
||||
});
|
||||
|
||||
@@ -62,7 +62,7 @@ mod c {
|
||||
pub use winapi::shared::ws2def::*;
|
||||
pub use winapi::um::winsock2::{
|
||||
SD_BOTH as SHUT_RDWR, SD_RECEIVE as SHUT_RD, SD_SEND as SHUT_WR, SOCK_DGRAM, SOCK_RAW,
|
||||
SOCK_RDM, SOCK_STREAM, *,
|
||||
SOCK_RDM, SOCK_STREAM, SOL_SOCKET, SO_BROADCAST, SO_REUSEADDR, *,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -179,10 +179,12 @@ impl PySocket {
|
||||
#[pymethod]
|
||||
fn recv(&self, bufsize: usize, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
|
||||
let mut buffer = vec![0u8; bufsize];
|
||||
match self.sock.borrow_mut().read_exact(&mut buffer) {
|
||||
Ok(()) => Ok(buffer),
|
||||
Err(err) => Err(convert_sock_error(vm, err)),
|
||||
}
|
||||
let n = self
|
||||
.sock()
|
||||
.recv(&mut buffer)
|
||||
.map_err(|err| convert_sock_error(vm, err))?;
|
||||
buffer.truncate(n);
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
@@ -196,10 +198,12 @@ impl PySocket {
|
||||
#[pymethod]
|
||||
fn recvfrom(&self, bufsize: usize, vm: &VirtualMachine) -> PyResult<(Vec<u8>, AddrTuple)> {
|
||||
let mut buffer = vec![0u8; bufsize];
|
||||
match self.sock().recv_from(&mut buffer) {
|
||||
Ok((_, addr)) => Ok((buffer, get_addr_tuple(addr))),
|
||||
Err(err) => Err(convert_sock_error(vm, err)),
|
||||
}
|
||||
let (n, addr) = self
|
||||
.sock()
|
||||
.recv_from(&mut buffer)
|
||||
.map_err(|err| convert_sock_error(vm, err))?;
|
||||
buffer.truncate(n);
|
||||
Ok((buffer, get_addr_tuple(addr)))
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
@@ -276,16 +280,68 @@ impl PySocket {
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn settimeout(&self, timeout: Option<f64>, vm: &VirtualMachine) -> PyResult<()> {
|
||||
fn settimeout(&self, timeout: Option<Duration>, vm: &VirtualMachine) -> PyResult<()> {
|
||||
// timeout is None: blocking, no timeout
|
||||
// timeout is 0: non-blocking, no timeout
|
||||
// otherwise: timeout is timeout, don't change blocking
|
||||
let (block, timeout) = match timeout {
|
||||
None => (Some(true), None),
|
||||
Some(d) if d == Duration::from_secs(0) => (Some(false), None),
|
||||
Some(d) => (None, Some(d)),
|
||||
};
|
||||
self.sock()
|
||||
.set_read_timeout(timeout.map(Duration::from_secs_f64))
|
||||
.set_read_timeout(timeout)
|
||||
.map_err(|err| convert_sock_error(vm, err))?;
|
||||
self.sock()
|
||||
.set_write_timeout(timeout.map(Duration::from_secs_f64))
|
||||
.set_write_timeout(timeout)
|
||||
.map_err(|err| convert_sock_error(vm, err))?;
|
||||
if let Some(blocking) = block {
|
||||
self.sock()
|
||||
.set_nonblocking(!blocking)
|
||||
.map_err(|err| convert_sock_error(vm, err))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn setsockopt(
|
||||
&self,
|
||||
level: i32,
|
||||
name: i32,
|
||||
value: Option<Either<PyBytesLike, i32>>,
|
||||
optlen: OptionalArg<u32>,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult<()> {
|
||||
let fd = sock_fileno(&self.sock()) as _;
|
||||
let ret = match (value, optlen) {
|
||||
(Some(Either::A(b)), OptionalArg::Missing) => b.with_ref(|b| unsafe {
|
||||
c::setsockopt(fd, level, name, b.as_ptr() as *const _, b.len() as _)
|
||||
}),
|
||||
(Some(Either::B(ref val)), OptionalArg::Missing) => unsafe {
|
||||
c::setsockopt(
|
||||
fd,
|
||||
level,
|
||||
name,
|
||||
val as *const i32 as *const _,
|
||||
std::mem::size_of::<i32>() as _,
|
||||
)
|
||||
},
|
||||
(None, OptionalArg::Present(optlen)) => unsafe {
|
||||
c::setsockopt(fd, level, name, std::ptr::null(), optlen as _)
|
||||
},
|
||||
_ => {
|
||||
return Err(
|
||||
vm.new_type_error("expected the value arg xor the optlen arg".to_string())
|
||||
);
|
||||
}
|
||||
};
|
||||
if ret < 0 {
|
||||
Err(convert_sock_error(vm, io::Error::last_os_error()))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn shutdown(&self, how: i32, vm: &VirtualMachine) -> PyResult<()> {
|
||||
let how = match how {
|
||||
@@ -570,6 +626,10 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
|
||||
"IPPROTO_IPIP" => ctx.new_int(c::IPPROTO_IP),
|
||||
"IPPROTO_IPV6" => ctx.new_int(c::IPPROTO_IPV6),
|
||||
"IPPROTO_NONE" => ctx.new_int(c::IPPROTO_NONE),
|
||||
"SOL_SOCKET" => ctx.new_int(c::SOL_SOCKET),
|
||||
"SO_REUSEADDR" => ctx.new_int(c::SO_REUSEADDR),
|
||||
"TCP_NODELAY" => ctx.new_int(c::TCP_NODELAY),
|
||||
"SO_BROADCAST" => ctx.new_int(c::SO_BROADCAST),
|
||||
"socket" => PySocket::make_class(ctx),
|
||||
"inet_aton" => ctx.new_rustfunc(socket_inet_aton),
|
||||
"inet_ntoa" => ctx.new_rustfunc(socket_inet_ntoa),
|
||||
|
||||
@@ -15,9 +15,8 @@ use crate::pyobject::{Either, PyClassImpl, PyObjectRef, PyResult, TryFromObject}
|
||||
use crate::vm::VirtualMachine;
|
||||
|
||||
#[cfg(unix)]
|
||||
fn time_sleep(seconds: f64, vm: &VirtualMachine) -> PyResult<()> {
|
||||
// this is basically std::thread::sleep, but that catches interrupts and we don't want to
|
||||
let dur = Duration::from_secs_f64(seconds);
|
||||
fn time_sleep(dur: Duration, vm: &VirtualMachine) -> PyResult<()> {
|
||||
// this is basically std::thread::sleep, but that catches interrupts and we don't want to;
|
||||
|
||||
let mut ts = libc::timespec {
|
||||
tv_sec: std::cmp::min(libc::time_t::max_value() as u64, dur.as_secs()) as libc::time_t,
|
||||
@@ -34,8 +33,8 @@ fn time_sleep(seconds: f64, vm: &VirtualMachine) -> PyResult<()> {
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn time_sleep(seconds: f64, _vm: &VirtualMachine) {
|
||||
std::thread::sleep(Duration::from_secs_f64(seconds));
|
||||
fn time_sleep(dur: Duration, _vm: &VirtualMachine) {
|
||||
std::thread::sleep(dur);
|
||||
}
|
||||
|
||||
#[cfg(any(not(target_arch = "wasm32"), target_os = "wasi"))]
|
||||
|
||||
Reference in New Issue
Block a user