require "uri"
require "net/http"
require "net/ftp"
require "timeout"
require "zlib"
require "stringio"
require "web/request"
require "web/response"
require "web/escape"
require "web/agent/cookiemanager"
require "web/agent/passwordmanager"
module Web
class Agent
VERSION = "0.1"
def initialize
@req = Web::Request.new
@rsp = nil
@req.header['User-Agent']="NoraAgent/#{VERSION}"
@req.header['Accept'] = '*/*'
@req.header['Accept-Language'] = 'ja,en,*'
@req.header['Accept-Encoding'] = 'gzip,deflate'
@req.header['Accept-Charset'] = '*'
@uri = nil
@timeout = 180
@proxy_host = nil
@proxy_port = nil
@ftpproxy_host = nil
@ftpproxy_port = nil
@cookiemanager = nil
@passwordmanager = nil
end
attr_accessor :uri
attr_accessor :req, :rsp, :proxy_host, :proxy_port, :ftpproxy_host, :ftpproxy_port
attr_accessor :cookiemanager, :passwordmanager
def setup()
if uri = (ENV['http_proxy'] || ENV['HTTP_PROXY'])
uri=URI.parse(uri)
@proxy_host = uri.host
@proxy_port = uri.port
end
if uri = (ENV['ftp_proxy'] || ENV['FTP_PROXY'])
uri=URI.parse(uri)
@ftpproxy_host = uri.host
@ftpproxy_port = uri.port
end
end
def get(uri)
set_uri(uri)
@req.method = 'GET'
timeout(@timeout) {
catch(:exit) {
loop do
case @uri.scheme
when "http"
get_http
when "https"
get_https
when "ftp"
get_ftp
end
end
}
}
end
def head(uri)
set_uri(uri)
@req.method = 'HEAD'
timeout(@timeout) {
catch(:exit) {
loop do
case @uri.scheme
when "http"
head_http
when "https"
head_https
when "ftp"
raise "not supported method"
end
end
}
}
end
def post(uri)
set_uri(uri)
@req.method = 'POST'
timeout(@timeout) {
catch(:exit) {
case @uri.scheme
when "http"
post_http
when "https"
post_https
when "ftp"
raise "not supported method"
end
# post after redirect
@req.form.clear
loop do
case @uri.scheme
when "http"
get_http
when "https"
get_https
when "ftp"
raise "not supported method"
end
end
}
}
end
def get_http
path = @req.script_name
@req.query_encode
@req.form_encode
if @req.query_string && !@req.query_string.empty?
path = path + "?" + @req.query_string
end
Net::HTTP.start(@req.server_name, @req.server_port.to_i, @proxy_host, @proxy_port) {|http|
# Password
@passwordmanager.get(self) if @passwordmanager
# Cookie
@cookiemanager.get(@req) if @cookiemanager
hash = @req.header.to_hash
if values=@req.header['cookie',nil]
hash.delete 'cookie'
hash['cookie'] = values.join('; ')
end
response = http.get(path, @req.header.to_hash)
@rsp = Web::Response.new
case response
when Net::HTTPSuccess
rtry = false
@rsp.status = response.code.to_s
response.each {|key,value|
@rsp.header.add key,value
}
# Cookie
if set_cookies = @rsp.header['set-cookie',nil]
set_cookies.each {|set_cookie|
@rsp.cookies.parse(set_cookie, @req)
}
@cookiemanager.set(@req, @rsp) if @cookiemanager
end
#
@rsp.body = response.body
# gzip,deflate
case @rsp.header['content-encoding']
when /gzip/i
io = StringIO.new(@rsp.body)
@rsp.body = Zlib::GzipReader.new(io).read()
when /deflate/i
@rsp.body = Zlib::Inflate.inflate(@rsp.body)
end
# リダイレクト判定
if refresh=@rsp.header['Refresh']
if refresh=~/\A\d;URL="?([^"]+)"?/ #"
redirect_uri($1)
rtry = true
end
end
#
if @rsp.header['content-type']=~/text\/html/
html = @rsp.body
html.gsub!(//,'')
if html=~/]+)/i #'
redirect_uri($1)
rtry = true
end
end
unless rtry
throw :exit
end
when Net::HTTPNotModified
@rsp.status = response.code.to_s
response.each {|key,value|
@rsp.header.add key,value
}
@rsp.body = response.body
throw :exit
when Net::HTTPRedirection
redirect_uri(response['location'])
when Net::HTTPUnauthorized
if @passwordmanager
@rsp.status = response.code.to_s
response.each {|key,value|
@rsp.header.add key,value
}
@rsp.body = response.body
@passwordmanager.get(self)
throw :exit
else
@rsp.status = response.code.to_s
response.each {|key,value|
@rsp.header.add key,value
}
@rsp.body = response.body
throw :exit
end
else
@rsp.status = response.code.to_s
response.each {|key,value|
@rsp.header.add key,value
}
@rsp.body = response.body
throw :exit
end
} # HTTP
end
def head_http
path = @req.script_name
@req.query_encode
if @req.query_string && !@req.query_string.empty?
path = path + "?" + @req.query_string
end
http = Net::HTTP.start(@req.server_name, @req.server_port.to_i, @proxy_host, @proxy_port)
# Password
@passwordmanager.get(self) if @passwordmanager
# Cookie
@cookiemanager.get(@req) if @cookiemanager
hash = @req.header.to_hash
if values=@req.header['cookie',nil]
hash.delete 'cookie'
hash['cookie'] = values.join('; ')
end
#
response = http.head(path, hash)
@rsp = Web::Response.new
# ヘッダセット
response.each {|key,value|
@rsp.header.add key,value
}
# Cookie
if set_cookies = @rsp.header['set-cookie',nil]
set_cookies.each {|set_cookie|
@rsp.cookies.parse(set_cookie, @req)
}
@cookiemanager.set(@req, @rsp) if @cookiemanager
end
case response
when Net::HTTPSuccess
rtry = false
@rsp.status = response.code.to_s
response.each {|key,value|
@rsp.header.add key,value
}
#
@rsp.body = response.body
# リダイレクト判定
if refresh=@rsp.header['Refresh']
if refresh=~/\A\d;URL="?([^"]+)"?/ #"
redirect_uri($1)
rtry = true
end
end
unless rtry
http.finish
throw :exit
end
when Net::HTTPRedirection
redirect_uri(response['location'])
else
@rsp.status = response.code.to_s
response.each {|key,value|
@rsp.header.add key,value
}
@rsp.body = response.body
http.finish
throw :exit
end
http.finish
end
def post_http
path = @req.script_name
@req.query_encode
@req.form_encode
if @req.query_string && !@req.query_string.empty?
path = path + "?" + @req.query_string
end
http = Net::HTTP.start(@req.server_name, @req.server_port.to_i, @proxy_host, @proxy_port)
# Password
@passwordmanager.get(self) if @passwordmanager
# Cookie
@cookiemanager.get(@req) if @cookiemanager
hash = @req.header.to_hash
if values=@req.header['cookie',nil]
hash.delete 'cookie'
hash['cookie'] = values.join('; ')
end
#
response = http.post(path, @req.body, hash)
@rsp = Web::Response.new
# ヘッダセット
response.each {|key,value|
@rsp.header.add key,value
}
# Cookie
if set_cookies = @rsp.header['set-cookie',nil]
set_cookies.each {|set_cookie|
@rsp.cookies.parse(set_cookie, @req)
}
@cookiemanager.set(@req, @rsp) if @cookiemanager
end
case response
when Net::HTTPSuccess
rtry = false
@rsp.status = response.code.to_s
response.each {|key,value|
@rsp.header.add key,value
}
#
@rsp.body = response.body
# gzip,deflate
case @rsp.header['content-encoding']
when /gzip/i
io = StringIO.new(@rsp.body)
@rsp.body = Zlib::GzipReader.new(io).read()
when /deflate/i
@rsp.body = Zlib::Inflate.inflate(@rsp.body)
end
# リダイレクト判定
if refresh=@rsp.header['Refresh']
if refresh=~/\A\d;URL="?([^"]+)"?/ #"
redirect_uri($1)
rtry = true
end
end
#
if @rsp.header['content-type']=~/text\/html/
html = @rsp.body
html.gsub!(//,'')
if html=~/]+)/i #'
redirect_uri($1)
rtry = true
end
end
unless rtry
http.finish
throw :exit
end
when Net::HTTPRedirection
redirect_uri(response['location'])
else
@rsp.status = response.code.to_s
response.each {|key,value|
@rsp.header.add key,value
}
@rsp.body = response.body
http.finish
throw :exit
end
http.finish
end
def redirect_uri(uri)
@uri = @uri.merge(uri)
set_uri(@uri.to_s)
end
def set_uri(uri)
@uri = URI.parse(uri.to_s).normalize
case @uri.scheme
when /^http/
@req.server_name = @uri.host
@req.server_port = @uri.port
@req.script_name = @uri.path
@req.query_string = @uri.query
@req.query.clear
unless @uri.query.nil?
@uri.query.scan(/([^=&;]+)(?:=([^&;]*))?[&;]?/n) {|key,value|
key = Web::unescape(key)
value = Web::unescape(value) if value
@req.query.add key,value
}
end
host = @uri.host
host = host + ":" + @uri.port.to_s if @uri.port != 80
@req.header['Host'] = host
when /^ftp/
@req.server_name = @uri.host
@req.server_port = @uri.port
@req.script_name = @uri.path
end
end
def get_ftp
@rsp = Web::Response.new
@rsp.body = ''
if @ftpproxy_host==nil
ftp = Net::FTP.new
begin
ftp.connect(@uri.host)
rescue
@rsp.status = Web::Response::STATUS_ServiceUnavailable
throw :exit
end
ftp.login(@uri.user || 'anonymous', @uri.password)
if ftp.lastresp=~/\A530/
@rsp.status = Web::Response::STATUS_Unauthorized
ftp.close
throw :exit
end
ftp.passive = true
begin
ftp.retrbinary("RETR #{@uri.path}", 1024) {|data|
@rsp.body << data
}
ftp.close
throw :exit
rescue Net::FTPPermError
@rsp.status = Web::Response::STATUS_NotFound
ftp.close
throw :exit
end
else
raise "not supported ftp_proxy"
end
end
end # Agent
end # Web