# # Copyright (C) 2007 Mobio Networks, Inc. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # =begin provides advertisement fetch for Google's Mobile Adsense (alpha level keyword version and released "page scraping version" =end require 'digest/sha1' require 'collections/sequenced_hash' require 'open-uri' module Rmobio module Ads module AdSense @@adServer="http://pagead2.googlesyndication.com/pagead/ads" @@mobio_useragent="Mobio" # we should probably come up with a better string @@default_ip="127.0.0.1" # better wrapper than what we hjad def get_ad(keywords,ad_client,request,kw_type="broad") getAd(keywords,ad_client,ip=request.remote_ip,request.request_uri,request.user_agent,request.env["HTTP_USERID"],kw_type) end # this returns an ad from Google's mobile AdSense, given the current page # * ad_client - your Google adsense ID # * ip - optional, the IP you want to tell Google its coming from # * url - optional, the page you want to put an ad on. Defaults to what is detected # * useragent - the useragent of the mobile browser. Defaults to what is detected. def getAdByPage(adClient,ip=request.remote_ip,url=request.request_uri,useragent=request.user_agent,userid=request.env["HTTP_USERID"]) @@userId=request.env['HTTP_USERID'] adArgs= SequencedHash.new adArgs["ad_type"]="text" adArgs["channel"]="8618723264" adArgs["client"]=adClient adArgs["format"]="mobile_single" adArgs["host"]=request.host if request.host adArgs["ip"]=URI.escape(ip) if ip adArgs["markup"]="xhtml" adArgs["output"]="xhtml" adArgs["oe"]="utf8" #adArgs["ref"]=referer adArgs["url"]=url if url adArgs["useragent"]=URI.escape(useragent) if useragent adArgs["eip"]=Digest::SHA1.hexdigest(userid)[0..20] if userid # oneway hashed userid # now build the URL to call out to based upon the base URL and ad hash adURL=@@adServer + "?" first=1 # dont put ampersand on first one adArgs.each_key do |x| if adArgs[x] (adURL=adURL+"&") unless first first=nil # start putting in &s adURL = adURL + x + "=" + adArgs[x] end end # ok, now call google's mobile adSense service and get back the full ad @pagead=open(adURL,"User-Agent"=>useragent).read end # retrieve ad fron network with specified # * keywords - to search # * ad_client - the publisher ID for the network (Google AdSense publisher ID in this case) # * ip (optional, defaults to accessing IP of client) - accessing IP # * url (optional, defaults to URL being accessed from the request header) - URL that the ad is for # * useragent (optional, defaults to useragent from the request header) - user agent string for the browser def getAd(keywords,ad_client,ip=request.remote_ip,url=request.request_uri,useragent=request.user_agent,userid=request.env["HTTP_USERID"],kw_type="broad") doTinyUrl=true #@user_id=request.env['HTTP_USERID'] if request.env and request.env['HTTP_USERID'] # build up the various arguments in the adArgs hash adArgs= SequencedHash.new adArgs["ad_type"]="text_image" adArgs["client"]=ad_client adArgs["format"]="mobile_single" adArgs["ip"]=URI.escape(ip) if ip adArgs["markup"]="xhtml" adArgs["output"]="xhtml" adArgs["oe"]="utf-8" adArgs["url"]=url if url adArgs["useragent"]=URI.escape(useragent) if useragent adArgs["eip"]=Digest::SHA1.hexdigest(userid)[0..20] if userid # oneway hashed userid if (keywords) adArgs["kw"]=CGI::escape(keywords) adArgs["kw_type"]=kw_type # defaults to broad, can be set to "exact" end # now built the URL to call out to based upon the base URL and ad hash adURL=@@adServer + "?" first=1 # dont put ampersand on first one adArgs.each_key do |x| if adArgs[x] (adURL=adURL+"&") unless first first=nil # start putting in &s adURL = adURL + x + "=" + adArgs[x] end end $log.info "Ad URL: " + adURL # paste in your browser from log to verify/test p "Ad url: " + adURL # ok, now call google's mobile adSense service and get back the full ad @ad=open(adURL,"User-Agent"=>CGI::escape(useragent)).read if @ad adDoc = REXML::Document.new @ad @text = adDoc.elements['//p'] if @text # do we have a

element # ok, now text has the full ad display content including links # grab @url, @urltext (link text for url), @phone, @phonetext (link text for call) if defined? @text[1].attributes['href'] and @text[1].attributes['href'] @adurl = doTinyUrl ? tinyUrl(@text[1].attributes['href']) : @text[1].attributes['href'] else @adurl = "nolink.rwap" end @urltext=@text[1].text @extratext=@text[2].to_s if @text[2] # this should have everything else that is not a child element if (@text.size>3 and @text[3]) # only process @text[3] for phone stuff if it exists @phone = (doTinyUrl ? tinyUrl(@text[3].attributes['href']) : @text[3].attributes['href']) if defined? @text[3].attributes['href'] and @text[3].attributes['href'] @phonetext=@text[3].text if defined? @text[3].text and @text[3].text end end end # if we get an ad back from adsense @ad # return the full ad text end # this returns an ad from Google's mobile AdSense, given a supplied set of keywords # * keywords - keywords to use for search # * ad_client - your Google adsense ID # * ip - optional, the IP you want to say the request is coming from # * url - optional, the page you want to put an ad on # * useragent - the browser this is for. Defaults to the one detected private # need to use tinyURL since the URLs that come back from AdSense are just too long for our client def tinyUrl(url) shrinker="http://tinyurl.com/create.php?url="+url result=open(shrinker).read # ITS NOT WELLFORMED HTML SO WE CAN'T USE XML TO PARSE #result=result.gsub(/\<\/head\>/,"") #resultDoc= Document.new result #tinyText=resultDoc.elements['//blockquote'][1] #tinyLink=tinyText.elements['./b'].text # find the second

and extra out the contents of it pattern='
' firstblock=result.index(pattern) if (firstblock) secondblock=result[firstblock+1...result.size].index('
')+ firstblock+1 + pattern.size endsecondblock=result[secondblock...result.size].index('') + secondblock tinyLink=result[secondblock...endsecondblock] end end end end end