Skip to content

Commit

Permalink
update google scraper, now with capybara / ajax support - also feelin…
Browse files Browse the repository at this point in the history
…g out an initial UI
  • Loading branch information
jcran committed Feb 13, 2012
1 parent 4245ea0 commit 43323c5
Show file tree
Hide file tree
Showing 25 changed files with 274 additions and 30 deletions.
15 changes: 13 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,36 @@ gem 'therubyracer'
gem 'fastercsv'
gem 'rex'
gem 'nmap-parser'
gem 'nokogiri'
gem 'json'

# Data Formats
gem 'exifr'

# Network Services:
#gem 'pcaprub'
gem 'dnsruby'
gem 'geoip'
gem 'whois'
#gem 'pcaprub'
gem 'packetfu'

# Web Services
gem 'linkedin'
gem 'flickr'

# Scraping
gem 'nokogiri'

# Heavy-duty scraping
gem 'selenium-webdriver'
gem 'capybara'
# https://github.com/thoughtbot/capybara-webkit#readme
# apt-get install libqt4-dev libqtwebkit-dev
gem 'capybara-webkit' # Requires QTwebkit
# gem 'headless'





# Gems used only for assets and not required
# in production environments by default.
Expand Down
16 changes: 14 additions & 2 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ GEM
ansi (1.4.1)
arel (2.2.1)
builder (3.0.0)
capybara (1.1.2)
mime-types (>= 1.16)
nokogiri (>= 1.3.3)
rack (>= 1.0.0)
rack-test (>= 0.5.4)
selenium-webdriver (~> 2.0)
xpath (~> 0.1.4)
capybara-webkit (0.9.0)
capybara (>= 1.0.0, < 1.2)
json
childprocess (0.3.0)
ffi (~> 1.0.6)
coderay (0.9.8)
Expand Down Expand Up @@ -76,7 +86,6 @@ GEM
nokogiri (1.5.0)
oauth (0.4.5)
packetfu (1.1.5)
pcaprub (0.11.2)
pg (0.12.2)
polyglot (0.3.3)
pry (0.9.7.4)
Expand Down Expand Up @@ -148,11 +157,15 @@ GEM
multi_json (>= 1.0.2)
whois (2.1.0)
xml-simple (1.1.1)
xpath (0.1.4)
nokogiri (~> 1.3)

PLATFORMS
ruby

DEPENDENCIES
capybara
capybara-webkit
coffee-rails (~> 3.1.1)
dnsruby
exifr
Expand All @@ -165,7 +178,6 @@ DEPENDENCIES
nmap-parser
nokogiri
packetfu
pcaprub
pg
pry
rails (= 3.1.2)
Expand Down
83 changes: 83 additions & 0 deletions app/controllers/tasks_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
class TasksController < ApplicationController
# GET /Tasks
# GET /Tasks.json
def index
@tasks = Task.all

respond_to do |format|
format.html # index.html.erb
format.json { render json: @tasks }
end
end

# GET /Tasks/1
# GET /Tasks/1.json
def show
@task = Task.find(params[:id])

respond_to do |format|
format.html # show.html.erb
format.json { render json: @task }
end
end

# GET /Tasks/new
# GET /Tasks/new.json
def new
@task = Task.new

respond_to do |format|
format.html # new.html.erb
format.json { render json: @task }
end
end

# GET /Tasks/1/edit
def edit
@task = Task.find(params[:id])
end

# POST /Tasks
# POST /Tasks.json
def create
@task = Task.new(params[:Task])

respond_to do |format|
if @task.save
format.html { redirect_to @task, notice: 'Task was successfully created.' }
format.json { render json: @task, status: :created, location: @task }
else
format.html { render action: "new" }
format.json { render json: @task.errors, status: :unprocessable_entity }
end
end
end

# PUT /Tasks/1
# PUT /Tasks/1.json
def update
@task = Task.find(params[:id])

respond_to do |format|
if @task.update_attributes(params[:Task])
format.html { redirect_to @task, notice: 'Task was successfully updated.' }
format.json { head :ok }
else
format.html { render action: "edit" }
format.json { render json: @task.errors, status: :unprocessable_entity }
end
end
end

# DELETE /Tasks/1
# DELETE /Tasks/1.json
def destroy
@task = Task.find(params[:id])
@task.destroy

respond_to do |format|
format.html { redirect_to Tasks_url }
format.json { head :ok }
end
end
end
26 changes: 26 additions & 0 deletions app/views/domains/show.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,32 @@
<%= @domain.organization_id %>
</p>

<p>
<b>Parents:</b>
<ul>
<% @domain.parents.each {|p|%>
<li><%= link_to p, eval("#{p.class.to_s.downcase}_path(p)") %></li>
<% } %>
</ul>
</p>

<p>
<b>Children:</b>
<ul>
<% @domain.children.each {|c|%>
<li><%= link_to c, eval("#{c.class.to_s.downcase}_path(c)") %></li>
<% } %>
</ul>
</p>

<p>
<b>Tasks:</b>
<ul>
<% @domain.tasks.each {|t|%>
<li><%= link_to t, eval("#{t.class.to_s.downcase}_path(t.name)") %></li>
<% } %>
</ul>
</p>

<%= link_to 'Edit', edit_domain_path(@domain) %> |
<%= link_to 'Back', domains_path %>
11 changes: 0 additions & 11 deletions app/views/hosts/show.html.erb
Original file line number Diff line number Diff line change
@@ -1,20 +1,9 @@
<p id="notice"><%= notice %></p>

<p>
<b>Name:</b>
<%= @host.name %>
</p>

<p>
<b>Ip address:</b>
<%= @host.ip_address %>
</p>

<p>
<b>Organization:</b>
<%= @host.organization_id %>
</p>


<%= link_to 'Edit', edit_host_path(@host) %> |
<%= link_to 'Back', hosts_path %>
10 changes: 10 additions & 0 deletions app/views/maps/index.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<h1>Listing maps</h1>
<table>
<tr>
<th>Type</th>
</tr>
<tr>
<td>Google Map</td>
</tr>
</table>
<br />
16 changes: 16 additions & 0 deletions app/views/tasks/_form.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<%= form_for(@task) do |f| %>
<% if @task.errors.any? %>
<div id="error_explanation">
<h2><%= pluralize(@task.errors.count, "error") %> prohibited this task from being saved:</h2>

<ul>
<% @task.errors.full_messages.each do |msg| %>
<li><%= msg %></li>
<% end %>
</ul>
</div>
<% end %>
<div class="actions">
<%= f.submit %>
</div>
<% end %>
6 changes: 6 additions & 0 deletions app/views/tasks/edit.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<h1>Editing task</h1>

<%= render 'form' %>

<%= link_to 'Show', @domain %> |
<%= link_to 'Back', domains_path %>
19 changes: 19 additions & 0 deletions app/views/tasks/index.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<h1>Listing tasks</h1>

<table>
<tr>
<th>Name</th>
<th>Accepts</th>
</tr>

<% @tasks.each do |task| %>
<tr>
<td><%= task.name %></td>
<td><%= x =[]; task.allowed_types.each {|t| x << t.name};x %> </td>
</tr>
<% end %>
</table>

<br />

<%= link_to 'New task', new_task_path %>
5 changes: 5 additions & 0 deletions app/views/tasks/new.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<h1>New task</h1>

<%= render 'form' %>

<%= link_to 'Back', tasks_path %>
9 changes: 9 additions & 0 deletions app/views/tasks/show.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<p id="notice"><%= notice %></p>

<p>
<b>Name:</b>
<%= @task.name %>
</p>

<%= link_to 'Edit', edit_task_path(@task) %> |
<%= link_to 'Back', tasks_path %>
1 change: 1 addition & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Ear::Application.routes.draw do
resources :tasks
resources :net_blocks
resources :findings
resources :task_results
Expand Down
37 changes: 36 additions & 1 deletion lib/client/google.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,46 @@
require 'open-uri'
require 'cgi'
require "capybara"
require "capybara/dsl"
require "capybara-webkit"

module Ear
module Client
module Google

############################
# DEPENDENCIES:
#
# Install the capybara gem:
# $ gem install capybara
#
# Then, follow instructions from https://github.com/thoughtbot/capybara-webkit#readme
# and install the capybara-webkit gem and drivers:
# $ sudo apt-get install libqt4-dev libqtwebkit-dev
# $ gem install capybara-webkit
############################

class SearchScraper
include Capybara::DSL

def initialize
Capybara.run_server = false
Capybara.default_selector = :xpath
Capybara.current_driver = :webkit
Capybara.app_host = "http://www.google.com/"
end

def search(term)
uris = []
visit('/')
fill_in "q", :with => term
click_button "Google Search"
results = all("//li[@class='g']/h3/a")
results.each { |r| uris << r[:href]}
uris
end
end

# This class represents the google AJAX API
#
# Reference:
Expand Down Expand Up @@ -80,7 +116,6 @@ def to_s
"#{@gsearch_result_class} #{@title} #{@url} #{@content}"
end


end

end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
require '../../../config/environment'
require 'test/unit'

class TestBing < Test::Unit::TestCase
class TestBingSearchService < Test::Unit::TestCase

def test_bing_search_acme
x = Ear::Client::Bing::SearchService.new
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#require '../corpwatch'
require 'test/unit'

class TestCorpwatch < Test::Unit::TestCase
class TestCorpwatchService < Test::Unit::TestCase

def test_corpwatch_search_acme
x = Ear::Client::Corpwatch::CorpwatchService.new
Expand Down
14 changes: 14 additions & 0 deletions lib/client/test/test_google_search_scraper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
$:.unshift(File.dirname(__FILE__))

require '../../../config/environment'
require 'test/unit'

class TestGoogleSearchScraper < Test::Unit::TestCase

def test_google_search_scraper_test
scraper = Ear::Client::Google::SearchScraper.new
results = scraper.search("test")
assert results.count == 10,"Wrong count, should be 10, is #{results.count}"
end

end
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
require '../../../config/environment'
require 'test/unit'

class TestGoogle < Test::Unit::TestCase
class TestGoogleSearchService < Test::Unit::TestCase

def test_google_search_acme
x = Ear::Client::Google::SearchService.new
Expand Down
Loading

0 comments on commit 43323c5

Please sign in to comment.