#### Source code for inandout.py ####
NAME = 'InAndOut/Technorati/Python'
VERSION = '0.01'
# Copyright (C) 2004 Chris Fry
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import technorati, sys, getopt
from pprint import pprint
def sort_by_value_and_links(d, weblogs):
""" Returns the keys of dictionary d sorted by their values """
items = d.items()
backitems = [[v[1], v[0], weblogs[v[0]]['weblog']['inboundlinks']] for v in items]
backitems.sort( lambda x, y: y[0]*10000000-x[0]*10000000+y[2]-x[2])
return [backitems[i] for i in range(0, len(backitems))]
def perform_query(url):
""" Return a list of who else people link to when they link to you """
""" Performs a cosmos query and then a links out query from the cosmos """
technorati.findkey()
data = technorati.getCosmos(url, 100)
inbound = {}
weblogs = {}
counts = {}
for i in data['inbound']:
""" Grab the url from the inbound link list """
in_url = i['weblog']['url']
""" Check if we have already visited this url """
if (inbound.has_key(in_url) == False):
inbound[in_url] = i['weblog']
""" For each url grab its outbound list """
for j in technorati.getOutboundBlogs(in_url)['outbound']:
out_url = j['weblog']['url']
""" ignore the case where the blog refers to itself """
if in_url != out_url:
weblogs[out_url] = j
if counts.has_key(out_url):
counts[out_url] = counts[out_url]+1
else:
counts[out_url] = 1;
result = {}
result['inbound'] = inbound
result['weblogs'] = weblogs
result['counts'] = counts
return result
def format_weblog(weblog, count=None):
""" formats a weblog return result in html """
s = "\t<tr><td><a href='"+weblog['url']+"'>"+weblog['name']+"</a></td> "
s = s+"<td>"+str(weblog['lastupdate'])+"</td> "
s = s+"<td>"+str(weblog['inboundblogs'])+"</td>"
s = s+"<td>"+str(weblog['inboundlinks'])+"</td>"
if count:
s = s+"<td>"+str(count)+"</td>"
s = s+"</tr>\n"
return s
def make_header(outfile, title):
outfile.write("<head><meta http-equiv='Content-Type' content='text/html; charset=iso-8859-1' />")
outfile.write("<title>Output of the In/Out Query for ");
outfile.write(title);
outfile.write("</title>\n")
outfile.write("<link rel='stylesheet' type='text/css' href='technorati.css'/>")
outfile.write("</head>\n")
def make_body(outfile, title, counts, inbound, weblogs):
outfile.write("<body id='technorati'>")
outfile.write("<div id='b'><h1 id='l'><a href='/'><img src='http://www.technorati.com/images/logo200407.gif' alt='Technorati' /></a></h1>")
outfile.write("<div id='m'><a href='http://www.technorati.com/help/using-technorati.html' title='Technorati: Help'>Help</a></div>")
outfile.write("<form id='searchform' method='get' action='http://www.technorati.com/cosmos/search.html'><div>")
outfile.write("<input type='hidden' name='rank' value='' />")
outfile.write("<input id='st' type='text' name='url' size='35' maxlength='127' value='"+title+"' />")
outfile.write("<input type='submit' class='submit' value='Search' title='Technorati: Search Results' />")
outfile.write("<label for='st'>Keyword or URL</label>")
outfile.write("</div>")
outfile.write("</form>")
outfile.write("<div id='cosmbox' class='bodytext'>")
outfile.write("<h2> InOut Search for <a href='"+title+"'>"+title+"</a></h2>")
outfile.write("<hr style=\"color: 'blue'; height: '15'; text-align: 'left'; width: '50%'\">")
outfile.write("<h3>Inbound Bloggers</h3>\n")
outfile.write("<table>\n")
outfile.write("\t<tr><th><h4>Blog Name/URL</h4></th><th><h4>Last Update</h4></th><th><h4>Inbound Blogs</h4></th><th><h4>Inbound Links</h4></th></tr>")
for i in inbound.values():
outfile.write(format_weblog(i))
outfile.write("</table>\n")
outfile.write("<hr style=\"color: 'blue'; height: '15'; text-align: 'left'; width: '50%'\">")
outfile.write("<h3>Inbound Bloggers Other Connections</h3>\n");
outfile.write("<p>Sorted by number of people who link to you and the blog and number of inbound links from the world</p>")
outfile.write("<table>\n")
outfile.write("\t<tr><th><h4>Blog Name/URL</h4></th><th><h4>Last Update</h4></th><th><h4>Inbound Blogs</h4></th><th><h4>Inbound Links</h4></th><th><h4>Number of People</h4></th></tr>")
for i in counts:
outfile.write(format_weblog(weblogs[i[1]]['weblog'], i[0]))
outfile.write("</table>\n")
def make_footer(outfile):
outfile.write("<div id='footer'><div>")
outfile.write("<a href='http://www.technorati.com/about/'>About Technorati</a>")
outfile.write("| <a href='http://www.technorati.com/about/contact.html' title='Technorati Contact Info'>Contact</a>")
outfile.write("| <a href='http://www.technorati.com/legal/privacy.html' title='Technorati Privacy Policy'>Privacy</a>")
outfile.write("| <a href='http://www.technorati.com/legal/TOS.html' title='Technorati Terms of Service'>Terms of Service</a>")
outfile.write("| <a href='http://www.technorati.com/developers/'>Developers</a>")
outfile.write("| <a href='http://www.technorati.com/help/using-technorati.html'>Help</a></div>")
outfile.write("<div><span id='copyright'>© 2004 Technorati, Inc.</span>")
outfile.write("<span id='badges'><a rel='license' href='http://creativecommons.org/licenses/by-nc/2.0/'>CC by-nc</a> |")
outfile.write("<a href='http://validator.w3.org/check/referer'>XHTML Strict</a> | <a href='http://jigsaw.w3.org/css-validator/check/referer'>CSS</a>")
outfile.write("</span>")
outfile.write("</div>")
outfile.write("</div>")
outfile.write("</body>")
def save_html(title, file, counts, inbound, weblogs):
outfile = open(file, 'w');
outfile.write("<html xmlns='http://www.w3.org/1999/xhtml'>")
make_header(outfile, title)
make_body(outfile, title, counts, inbound, weblogs);
make_footer(outfile)
outfile.write("</html>")
outfile.close();
def run(url, outputfile):
result = perform_query(url)
inbound = result['inbound']
weblogs = result['weblogs']
counts = sort_by_value_and_links(result['counts'], weblogs)
save_html(url, outputfile, counts, inbound, weblogs)
def usage():
print>>sys.stderr, "Usage: python inandout.py -u url -o outputfile"
def main():
try:
opts, args = getopt.getopt(sys.argv[1: ], "hu:o:", ["help", "url=", "output="])
except getopt.GetoptError:
# print help information and exit:
usage()
sys.exit(2)
url = "http://www.sifry.com/alerts/"
outputfile = "out.html"
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
if o in ("-u", "--url"):
url = a
if o in ("-o", "--output"):
outputfile = a
run(url, outputfile)
if __name__ == "__main__":
sys.exit(main())
[Created with py2html Ver:0.62]