# This is a shell archive. Save it in a file, remove anything before # this line, and then unpack it by entering "sh file". Note, it may # create directories; files and directories will be owned by you and # have default permissions. # # This archive contains: # # index.cgi # dayomon.py # echo x - index.cgi sed 's/^X//' >index.cgi << 'END-of-index.cgi' X#!/usr/bin/env python X# encoding: utf-8 X Ximport os Ximport cgi Ximport cgitb; cgitb.enable() Ximport socket Ximport urllib2 Ximport dayomon X XRedirectPage = '../dayomon.html' XValidAgent = 'KHTML Gecko IE Netscape Opera Sleipnir Lynx w3m' X Xdef redirect(): X print 'Status: 302 Found' X print 'Location: %s\n' % RedirectPage X Xdef validagent(): X if not os.environ.has_key('HTTP_USER_AGENT'): X return False X else: X agent = os.environ['HTTP_USER_AGENT'].lower() X uas = ValidAgent.split(' ') X match = [ua for ua in uas if ua.lower() in agent] X if not match: X return False X else: X return True X Xreq = cgi.FieldStorage() Xuri = req.getvalue('uri', '') Xview = req.getvalue('view', '') X Xif view == 'src': X print 'content-type: text/plain; charset=utf-8\n' X for fn in 'index.cgi dayomon.py'.split(' '): X print ' '.join([('-')*10, fn]) X f = open(fn) X print f.read() X f.close X Xelif uri: X for spchar in ('"', '<', '>', "'"): X if spchar in uri: X redirect() X break X X print 'content-type: text/html\n' X X if not validagent(): X print 'invalid user-agent. (allowed: %s)' % ValidAgent X else: X try: X print dayomon.dayomonized_htmlget(uri).encode('utf-8') X except (socket.timeout, urllib2.URLError): X print 'Operation Timeouted!!' X Xelse: X redirect() X END-of-index.cgi echo x - dayomon.py sed 's/^X//' >dayomon.py << 'END-of-dayomon.py' X#!/usr/bin/env python X# encoding: utf-8 X X__version__ = '1.1 (20080813)' XFilterURL = 'http://wids.net/lab/dayomon/?uri=' X Ximport re Ximport urllib2 Ximport urlparse Xfrom BeautifulSoup import BeautifulSoup X XMainSwaps = [ X (u'.', u'。'), X (u',', u'、'), X (u'る。', u'るんだよもん.'), X (u'た。', u'たんだよもん.'), X (u'です。', u'だよもん.'), X (u'ね。', u'だよもん.'), X (u'だ。', u'だよもん.'), X (u'。', u'だよもん。'), X (u'?', u'だよもん?'), X (u'!', u'だよもん!'), X (u'.', u'。'), X] XDoubleSwaps = [ X (u'(だよもん。){2,}', u'だよもん。'), X (u'(だよもん?){2,}', u'だよもん?'), X (u'(だよもん!){2,}', u'だよもん!'), X (u'だよもん!だよもん?', u'だよもん!?'), X (u'だよもん?だよもん!', u'だよもん?!'), X] X Xdef pageget(url): X import socket X socket.setdefaulttimeout(5) X agent = '/'.join(['DayomonFilter.py', __version__]) X req = urllib2.Request(url) X req.add_header('User-Agent', agent) X http = urllib2.urlopen(req) X res = http.read() X http.close X X return res X Xdef urlconvert(url, soup): X for css in soup.findAll('link', type='text/css', href=True): X urltuple = urlparse.urlparse(css['href']) X if not urltuple[0]: X css['href'] = urlparse.urljoin(url, css['href']) X X for img in soup.findAll('img', src=True): X urltuple = urlparse.urlparse(img['src']) X if not urltuple[0]: X img['src'] = urlparse.urljoin(url, img['src']) X X for element in soup.findAll('a', href=True): X urltuple = urlparse.urlparse(element['href']) X if not urltuple[0]: X element['href'] = urlparse.urljoin(url, element['href']) X urltuple = urlparse.urlparse(element['href']) X if urltuple[0] == 'http': X element['href'] = ''.join([FilterURL, element['href']]) X X return soup X Xdef dayomonize(_str): X for table in MainSwaps: X _str = re.sub(table[0], table[1], _str) X for table in DoubleSwaps: X _str = re.sub(table[0], table[1], _str) X X return _str X Xdef dayomonized_htmlget(url): X html = pageget(url) X soup = urlconvert(url, BeautifulSoup(html)) X X return dayomonize(unicode(soup)) X END-of-dayomon.py exit