require "uri" require "net/http" require "net/ftp" require "timeout" require "zlib" require "stringio" require "web/request" require "web/response" require "web/escape" require "web/agent/cookiemanager" require "web/agent/passwordmanager" module Web class Agent VERSION = "0.1" def initialize @req = Web::Request.new @rsp = nil @req.header['User-Agent']="NoraAgent/#{VERSION}" @req.header['Accept'] = '*/*' @req.header['Accept-Language'] = 'ja,en,*' @req.header['Accept-Encoding'] = 'gzip,deflate' @req.header['Accept-Charset'] = '*' @uri = nil @timeout = 180 @proxy_host = nil @proxy_port = nil @ftpproxy_host = nil @ftpproxy_port = nil @cookiemanager = nil @passwordmanager = nil end attr_accessor :uri attr_accessor :req, :rsp, :proxy_host, :proxy_port, :ftpproxy_host, :ftpproxy_port attr_accessor :cookiemanager, :passwordmanager def setup() if uri = (ENV['http_proxy'] || ENV['HTTP_PROXY']) uri=URI.parse(uri) @proxy_host = uri.host @proxy_port = uri.port end if uri = (ENV['ftp_proxy'] || ENV['FTP_PROXY']) uri=URI.parse(uri) @ftpproxy_host = uri.host @ftpproxy_port = uri.port end end def get(uri) set_uri(uri) @req.method = 'GET' timeout(@timeout) { catch(:exit) { loop do case @uri.scheme when "http" get_http when "https" get_https when "ftp" get_ftp end end } } end def head(uri) set_uri(uri) @req.method = 'HEAD' timeout(@timeout) { catch(:exit) { loop do case @uri.scheme when "http" head_http when "https" head_https when "ftp" raise "not supported method" end end } } end def post(uri) set_uri(uri) @req.method = 'POST' timeout(@timeout) { catch(:exit) { case @uri.scheme when "http" post_http when "https" post_https when "ftp" raise "not supported method" end # post after redirect @req.form.clear loop do case @uri.scheme when "http" get_http when "https" get_https when "ftp" raise "not supported method" end end } } end def get_http path = @req.script_name @req.query_encode @req.form_encode if @req.query_string && !@req.query_string.empty? path = path + "?" + @req.query_string end Net::HTTP.start(@req.server_name, @req.server_port.to_i, @proxy_host, @proxy_port) {|http| # Password @passwordmanager.get(self) if @passwordmanager # Cookie @cookiemanager.get(@req) if @cookiemanager hash = @req.header.to_hash if values=@req.header['cookie',nil] hash.delete 'cookie' hash['cookie'] = values.join('; ') end response = http.get(path, @req.header.to_hash) @rsp = Web::Response.new case response when Net::HTTPSuccess rtry = false @rsp.status = response.code.to_s response.each {|key,value| @rsp.header.add key,value } # Cookie if set_cookies = @rsp.header['set-cookie',nil] set_cookies.each {|set_cookie| @rsp.cookies.parse(set_cookie, @req) } @cookiemanager.set(@req, @rsp) if @cookiemanager end # @rsp.body = response.body # gzip,deflate case @rsp.header['content-encoding'] when /gzip/i io = StringIO.new(@rsp.body) @rsp.body = Zlib::GzipReader.new(io).read() when /deflate/i @rsp.body = Zlib::Inflate.inflate(@rsp.body) end # リダイレクト判定 if refresh=@rsp.header['Refresh'] if refresh=~/\A\d;URL="?([^"]+)"?/ #" redirect_uri($1) rtry = true end end # if @rsp.header['content-type']=~/text\/html/ html = @rsp.body html.gsub!(//,'') if html=~/]+)/i #' redirect_uri($1) rtry = true end end unless rtry throw :exit end when Net::HTTPNotModified @rsp.status = response.code.to_s response.each {|key,value| @rsp.header.add key,value } @rsp.body = response.body throw :exit when Net::HTTPRedirection redirect_uri(response['location']) when Net::HTTPUnauthorized if @passwordmanager @rsp.status = response.code.to_s response.each {|key,value| @rsp.header.add key,value } @rsp.body = response.body @passwordmanager.get(self) throw :exit else @rsp.status = response.code.to_s response.each {|key,value| @rsp.header.add key,value } @rsp.body = response.body throw :exit end else @rsp.status = response.code.to_s response.each {|key,value| @rsp.header.add key,value } @rsp.body = response.body throw :exit end } # HTTP end def head_http path = @req.script_name @req.query_encode if @req.query_string && !@req.query_string.empty? path = path + "?" + @req.query_string end http = Net::HTTP.start(@req.server_name, @req.server_port.to_i, @proxy_host, @proxy_port) # Password @passwordmanager.get(self) if @passwordmanager # Cookie @cookiemanager.get(@req) if @cookiemanager hash = @req.header.to_hash if values=@req.header['cookie',nil] hash.delete 'cookie' hash['cookie'] = values.join('; ') end # response = http.head(path, hash) @rsp = Web::Response.new # ヘッダセット response.each {|key,value| @rsp.header.add key,value } # Cookie if set_cookies = @rsp.header['set-cookie',nil] set_cookies.each {|set_cookie| @rsp.cookies.parse(set_cookie, @req) } @cookiemanager.set(@req, @rsp) if @cookiemanager end case response when Net::HTTPSuccess rtry = false @rsp.status = response.code.to_s response.each {|key,value| @rsp.header.add key,value } # @rsp.body = response.body # リダイレクト判定 if refresh=@rsp.header['Refresh'] if refresh=~/\A\d;URL="?([^"]+)"?/ #" redirect_uri($1) rtry = true end end unless rtry http.finish throw :exit end when Net::HTTPRedirection redirect_uri(response['location']) else @rsp.status = response.code.to_s response.each {|key,value| @rsp.header.add key,value } @rsp.body = response.body http.finish throw :exit end http.finish end def post_http path = @req.script_name @req.query_encode @req.form_encode if @req.query_string && !@req.query_string.empty? path = path + "?" + @req.query_string end http = Net::HTTP.start(@req.server_name, @req.server_port.to_i, @proxy_host, @proxy_port) # Password @passwordmanager.get(self) if @passwordmanager # Cookie @cookiemanager.get(@req) if @cookiemanager hash = @req.header.to_hash if values=@req.header['cookie',nil] hash.delete 'cookie' hash['cookie'] = values.join('; ') end # response = http.post(path, @req.body, hash) @rsp = Web::Response.new # ヘッダセット response.each {|key,value| @rsp.header.add key,value } # Cookie if set_cookies = @rsp.header['set-cookie',nil] set_cookies.each {|set_cookie| @rsp.cookies.parse(set_cookie, @req) } @cookiemanager.set(@req, @rsp) if @cookiemanager end case response when Net::HTTPSuccess rtry = false @rsp.status = response.code.to_s response.each {|key,value| @rsp.header.add key,value } # @rsp.body = response.body # gzip,deflate case @rsp.header['content-encoding'] when /gzip/i io = StringIO.new(@rsp.body) @rsp.body = Zlib::GzipReader.new(io).read() when /deflate/i @rsp.body = Zlib::Inflate.inflate(@rsp.body) end # リダイレクト判定 if refresh=@rsp.header['Refresh'] if refresh=~/\A\d;URL="?([^"]+)"?/ #" redirect_uri($1) rtry = true end end # if @rsp.header['content-type']=~/text\/html/ html = @rsp.body html.gsub!(//,'') if html=~/]+)/i #' redirect_uri($1) rtry = true end end unless rtry http.finish throw :exit end when Net::HTTPRedirection redirect_uri(response['location']) else @rsp.status = response.code.to_s response.each {|key,value| @rsp.header.add key,value } @rsp.body = response.body http.finish throw :exit end http.finish end def redirect_uri(uri) @uri = @uri.merge(uri) set_uri(@uri.to_s) end def set_uri(uri) @uri = URI.parse(uri.to_s).normalize case @uri.scheme when /^http/ @req.server_name = @uri.host @req.server_port = @uri.port @req.script_name = @uri.path @req.query_string = @uri.query @req.query.clear unless @uri.query.nil? @uri.query.scan(/([^=&;]+)(?:=([^&;]*))?[&;]?/n) {|key,value| key = Web::unescape(key) value = Web::unescape(value) if value @req.query.add key,value } end host = @uri.host host = host + ":" + @uri.port.to_s if @uri.port != 80 @req.header['Host'] = host when /^ftp/ @req.server_name = @uri.host @req.server_port = @uri.port @req.script_name = @uri.path end end def get_ftp @rsp = Web::Response.new @rsp.body = '' if @ftpproxy_host==nil ftp = Net::FTP.new begin ftp.connect(@uri.host) rescue @rsp.status = Web::Response::STATUS_ServiceUnavailable throw :exit end ftp.login(@uri.user || 'anonymous', @uri.password) if ftp.lastresp=~/\A530/ @rsp.status = Web::Response::STATUS_Unauthorized ftp.close throw :exit end ftp.passive = true begin ftp.retrbinary("RETR #{@uri.path}", 1024) {|data| @rsp.body << data } ftp.close throw :exit rescue Net::FTPPermError @rsp.status = Web::Response::STATUS_NotFound ftp.close throw :exit end else raise "not supported ftp_proxy" end end end # Agent end # Web