forked from jhubert/website-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
website-scraper.rb
48 lines (43 loc) · 1.29 KB
/
website-scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# You'll need to require these if you
# want to develop while running with ruby.
# The config/rackup.ru requires these as well
# for it's own reasons.
#
# $ ruby heroku-sinatra-app.rb
#
require 'rubygems'
require 'sinatra'
require 'nokogiri'
require 'net/http'
require 'open-uri'
configure :production do
end
# Quick test
get '/' do
content_type :text
uri = params[:url]
selector = params[:selector]
callback = params[:callback]
unless uri.nil?
url = URI::parse(uri.gsub(' ','+'))
http = Net::HTTP.new(url.host)
path = (url.query == '' || url.query.nil?) ? ((url.path == '' || url.path.nil?) ? '/' : url.path) : "#{url.path}?#{url.query}"
resp, body = http.get2(path, {'User-Agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.1) Gecko/20060111 Firefox/1.5.0.1'})
return(body) if selector.nil?
puts selector
doc = Nokogiri::HTML(body)
output = doc.css(selector).to_s
return(output) if callback.nil?
content_type :js
return("#{callback}('#{output.gsub("'","\\'").gsub("\n",'\n')}')")
end
content_type :html
haml :index
end
# Test at <appname>.heroku.com
# You can see all your app specific information this way.
# IMPORTANT! This is a very bad thing to do for a production
# application with sensitive information
get '/env' do
ENV.inspect
end