# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
#	index.cgi
#	dayomon.py
#
echo x - index.cgi
sed 's/^X//' >index.cgi << 'END-of-index.cgi'
X#!/usr/bin/env python
X# encoding: utf-8
X
Ximport os
Ximport cgi
Ximport cgitb; cgitb.enable()
Ximport socket
Ximport urllib2
Ximport dayomon
X
XRedirectPage = '../dayomon.html'
XValidAgent = 'KHTML Gecko IE Netscape Opera Sleipnir Lynx w3m'
X
Xdef redirect():
X	print 'Status: 302 Found'
X	print 'Location: %s\n' % RedirectPage
X
Xdef validagent():
X	if not os.environ.has_key('HTTP_USER_AGENT'):
X		return False
X	else:
X		agent = os.environ['HTTP_USER_AGENT'].lower()
X		uas = ValidAgent.split(' ')
X		match = [ua for ua in uas if ua.lower() in agent]
X		if not match:
X			return False
X		else:
X			return True
X
Xreq = cgi.FieldStorage()
Xuri = req.getvalue('uri', '')
Xview = req.getvalue('view', '')
X
Xif view == 'src':
X	print 'content-type: text/plain; charset=utf-8\n'
X	for fn in 'index.cgi dayomon.py'.split(' '):
X		print ' '.join([('-')*10, fn])
X		f = open(fn)
X		print f.read()
X		f.close
X
Xelif uri:
X	for spchar in ('"', '<', '>', "'"):
X		if spchar in uri:
X			redirect()
X			break
X
X	print 'content-type: text/html\n'
X
X	if not validagent():
X		print 'invalid user-agent. (allowed: %s)' % ValidAgent
X	else:
X		try:
X			print dayomon.dayomonized_htmlget(uri).encode('utf-8')
X		except (socket.timeout, urllib2.URLError):
X			print 'Operation Timeouted!!'
X	
Xelse:
X	redirect()
X
END-of-index.cgi
echo x - dayomon.py
sed 's/^X//' >dayomon.py << 'END-of-dayomon.py'
X#!/usr/bin/env python
X# encoding: utf-8
X
X__version__ = '1.1 (20080813)'
XFilterURL = 'http://wids.net/lab/dayomon/?uri='
X
Ximport re
Ximport urllib2
Ximport urlparse
Xfrom BeautifulSoup import BeautifulSoup
X
XMainSwaps = [
X	(u'．',     u'。'),
X	(u'，',     u'、'),
X	(u'る。',   u'るんだよもん．'),
X	(u'た。',   u'たんだよもん．'),
X	(u'です。', u'だよもん．'),
X	(u'ね。',   u'だよもん．'),
X	(u'だ。',   u'だよもん．'),
X	(u'。',     u'だよもん。'),
X	(u'？',     u'だよもん？'),
X	(u'！',     u'だよもん！'),
X	(u'．',     u'。'),
X]
XDoubleSwaps = [
X	(u'(だよもん。){2,}',     u'だよもん。'),
X	(u'(だよもん？){2,}',     u'だよもん？'),
X	(u'(だよもん！){2,}',     u'だよもん！'),
X	(u'だよもん！だよもん？', u'だよもん！？'),
X	(u'だよもん？だよもん！', u'だよもん？！'),
X]
X
Xdef pageget(url):
X	import socket
X	socket.setdefaulttimeout(5)
X	agent = '/'.join(['DayomonFilter.py', __version__])
X	req = urllib2.Request(url)
X	req.add_header('User-Agent', agent)
X	http = urllib2.urlopen(req)
X	res = http.read()
X	http.close
X
X	return res
X	
Xdef urlconvert(url, soup):
X	for css in soup.findAll('link', type='text/css', href=True):
X		urltuple = urlparse.urlparse(css['href'])
X		if not urltuple[0]:
X			css['href'] = urlparse.urljoin(url, css['href'])
X
X	for img in soup.findAll('img', src=True):
X		urltuple = urlparse.urlparse(img['src'])
X		if not urltuple[0]:
X			img['src'] = urlparse.urljoin(url, img['src'])
X		
X	for element in soup.findAll('a', href=True):
X		urltuple = urlparse.urlparse(element['href'])
X		if not urltuple[0]:
X			element['href'] = urlparse.urljoin(url, element['href'])
X			urltuple = urlparse.urlparse(element['href'])
X		if urltuple[0] == 'http':
X			element['href'] = ''.join([FilterURL, element['href']])
X
X	return soup
X
Xdef dayomonize(_str):
X	for table in MainSwaps:
X		_str = re.sub(table[0], table[1], _str)
X	for table in DoubleSwaps:
X		_str = re.sub(table[0], table[1], _str)
X	
X	return _str
X
Xdef dayomonized_htmlget(url):
X	html = pageget(url)
X	soup = urlconvert(url, BeautifulSoup(html))
X	
X	return dayomonize(unicode(soup))
X
END-of-dayomon.py
exit