#### Source code for inandout.py ####


NAME = 'InAndOut/Technorati/Python'
VERSION = '0.01'

# Copyright (C) 2004 Chris Fry

# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:

# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import technorati, sys, getopt
from pprint import pprint

def sort_by_value_and_links(d, weblogs): 
    """ Returns the keys of dictionary d sorted by their values """
    items = d.items()
    backitems = [[v[1], v[0], weblogs[v[0]]['weblog']['inboundlinks']] for v in items]
    backitems.sort( lambda x, y: y[0]*10000000-x[0]*10000000+y[2]-x[2])
    return [backitems[i] for i in range(0, len(backitems))]

def perform_query(url): 
    """ Return a list of who else people link to when they link to you """
    """ Performs a cosmos query and then a links out query from the cosmos """

    technorati.findkey()
    data = technorati.getCosmos(url, 100)
    inbound = {}
    weblogs = {}
    counts = {}
    for i in data['inbound']: 
       """ Grab the url from the inbound link list """
       in_url = i['weblog']['url']

       """ Check if we have already visited this url """
       if (inbound.has_key(in_url) == False): 
          inbound[in_url] = i['weblog']

          """ For each url grab its outbound list """
          for j in technorati.getOutboundBlogs(in_url)['outbound']: 
             out_url = j['weblog']['url']
             """ ignore the case where the blog refers to itself """
             if in_url != out_url: 
                weblogs[out_url] = j
                if counts.has_key(out_url): 
                   counts[out_url] = counts[out_url]+1
                else: 
                   counts[out_url] = 1; 

    result = {}
    result['inbound'] = inbound
    result['weblogs'] = weblogs
    result['counts'] = counts
    return result

def format_weblog(weblog, count=None): 
    """ formats a weblog return result in html """
    s = "\t<tr><td><a href='"+weblog['url']+"'>"+weblog['name']+"</a></td> "
    s = s+"<td>"+str(weblog['lastupdate'])+"</td> "
    s = s+"<td>"+str(weblog['inboundblogs'])+"</td>"
    s = s+"<td>"+str(weblog['inboundlinks'])+"</td>"
    if count: 
       s = s+"<td>"+str(count)+"</td>"
    s = s+"</tr>\n"
    return s

def make_header(outfile, title): 
    outfile.write("<head><meta http-equiv='Content-Type' content='text/html; charset=iso-8859-1' />")
    outfile.write("<title>Output of the In/Out Query for "); 
    outfile.write(title); 
    outfile.write("</title>\n")
    outfile.write("<link rel='stylesheet' type='text/css' href='technorati.css'/>")
    outfile.write("</head>\n")

def make_body(outfile, title, counts, inbound, weblogs): 
    outfile.write("<body id='technorati'>")
    outfile.write("<div id='b'><h1 id='l'><a href='/'><img src='http://www.technorati.com/images/logo200407.gif'  alt='Technorati' /></a></h1>")
    outfile.write("<div id='m'><a href='http://www.technorati.com/help/using-technorati.html' title='Technorati: Help'>Help</a></div>")
    outfile.write("<form id='searchform' method='get' action='http://www.technorati.com/cosmos/search.html'><div>")
    outfile.write("<input type='hidden' name='rank' value='' />")
    outfile.write("<input id='st' type='text' name='url' size='35' maxlength='127' value='"+title+"' />")
    outfile.write("<input type='submit' class='submit' value='Search' title='Technorati: Search Results' />")
    outfile.write("<label for='st'>Keyword or URL</label>")
    outfile.write("</div>")
    outfile.write("</form>")

    outfile.write("<div id='cosmbox' class='bodytext'>")
    outfile.write("<h2> InOut Search for <a href='"+title+"'>"+title+"</a></h2>")
    outfile.write("<hr style=\"color: 'blue'; height: '15'; text-align: 'left'; width: '50%'\">")
    outfile.write("<h3>Inbound Bloggers</h3>\n")
    outfile.write("<table>\n")
    outfile.write("\t<tr><th><h4>Blog Name/URL</h4></th><th><h4>Last Update</h4></th><th><h4>Inbound Blogs</h4></th><th><h4>Inbound Links</h4></th></tr>")

    for i in inbound.values(): 
        outfile.write(format_weblog(i))

    outfile.write("</table>\n")
    outfile.write("<hr style=\"color: 'blue'; height: '15'; text-align: 'left'; width: '50%'\">")
    outfile.write("<h3>Inbound Bloggers Other Connections</h3>\n"); 
    outfile.write("<p>Sorted by number of people who link to you and the blog and number of inbound links from the world</p>")
    outfile.write("<table>\n")
    outfile.write("\t<tr><th><h4>Blog Name/URL</h4></th><th><h4>Last Update</h4></th><th><h4>Inbound Blogs</h4></th><th><h4>Inbound Links</h4></th><th><h4>Number of People</h4></th></tr>")

    for i in counts: 
        outfile.write(format_weblog(weblogs[i[1]]['weblog'], i[0]))

    outfile.write("</table>\n")

def make_footer(outfile): 
    outfile.write("<div id='footer'><div>")
    outfile.write("<a href='http://www.technorati.com/about/'>About Technorati</a>")
    outfile.write("| <a href='http://www.technorati.com/about/contact.html' title='Technorati Contact Info'>Contact</a>")
    outfile.write("| <a href='http://www.technorati.com/legal/privacy.html' title='Technorati Privacy Policy'>Privacy</a>")
    outfile.write("| <a href='http://www.technorati.com/legal/TOS.html' title='Technorati Terms of Service'>Terms of Service</a>")
    outfile.write("| <a href='http://www.technorati.com/developers/'>Developers</a>")
    outfile.write("| <a href='http://www.technorati.com/help/using-technorati.html'>Help</a></div>")
    outfile.write("<div><span id='copyright'>&copy; 2004 Technorati, Inc.</span>")
    outfile.write("<span id='badges'><a rel='license' href='http://creativecommons.org/licenses/by-nc/2.0/'>CC by-nc</a> |")
    outfile.write("<a href='http://validator.w3.org/check/referer'>XHTML Strict</a> | <a href='http://jigsaw.w3.org/css-validator/check/referer'>CSS</a>")
    outfile.write("</span>")
    outfile.write("</div>")
    outfile.write("</div>")
    outfile.write("</body>")

def save_html(title, file, counts, inbound, weblogs): 
    outfile = open(file, 'w'); 
    outfile.write("<html xmlns='http://www.w3.org/1999/xhtml'>")
    make_header(outfile, title)
    make_body(outfile, title, counts, inbound, weblogs); 
    make_footer(outfile)
    outfile.write("</html>")
    outfile.close(); 

def run(url, outputfile): 
    result = perform_query(url)
    inbound = result['inbound']
    weblogs = result['weblogs']
    counts = sort_by_value_and_links(result['counts'], weblogs)
    save_html(url, outputfile, counts, inbound, weblogs)

def usage(): 
    print>>sys.stderr, "Usage: python inandout.py -u url -o outputfile"
    
def main(): 
    try: 
        opts, args = getopt.getopt(sys.argv[1: ], "hu:o:", ["help", "url=", "output="])
    except getopt.GetoptError: 
        # print help information and exit:
        usage()
        sys.exit(2)

    url = "http://www.sifry.com/alerts/"
    outputfile = "out.html"
    for o, a in opts: 
        if o in ("-h", "--help"): 
            usage()
            sys.exit()
        if o in ("-u", "--url"): 
            url = a
        if o in ("-o", "--output"): 
            outputfile = a

    run(url, outputfile)

if __name__ == "__main__": 
    sys.exit(main())


    



[Created with py2html Ver:0.62]

Valid HTML 4.01!