
import re
import cPickle
import nltk
import networkx as nx
import twitter

rt_patterns = re.compile(r"(RT|via)((?:\b\W*@\w+)+)", re.IGNORECASE)

mg = nx.DiGraph()
eg = nx.DiGraph()

mtweets = cPickle.load(open("versus.madonna.tweets.pickle"))
etweets = cPickle.load(open("versus.elton.tweets.pickle"))

print "Loaded tweets"

def get_rt_sources(tweets):
    return [ source.strip()
                        for tuple in rt_patterns.findall(tweets)
                            for source in tuple
                                if source not in ("RT", "via") ]

def process_tweets(tweets, graph):
    for t in tweets:
        rt_sources = get_rt_sources(t["text"])
        if not rt_sources:
            continue
        for rt_source in rt_sources:
            graph.add_edge(rt_source, t["from_user"], {"tweet_id": t["id"]})

def write_dot_file(graph,fname):
    try:
        nx.drawing.write_dot(graph,fname)
    except ImportError, e:
        dot = ['"%s" -> "%s" [tweet_id=%s]' % (n1, n2, graph[n1][n2]['tweet_id'])
                            for n1,n2 in graph.edges()]
        dotenc = [ item.encode('ascii','replace')
                                for item in dot ]
        f = open(fname,"w")
        f.write('strict digraph {\n%s\n}'%(';\n'.join(dotenc),))
        f.close()


process_tweets(mtweets,mg)

print "Number of nodes for Madonna tweets:",mg.number_of_nodes()
print "Number of edges for Madonna tweets:",mg.number_of_edges()

process_tweets(etweets,eg)

print "Number of nodes for Elton John tweets:",eg.number_of_nodes()
print "Number of edges for Elton John tweets:",eg.number_of_edges()

write_dot_file(mg,"versus.madonna.graph.dot")
write_dot_file(eg,"versus.elton.graph.dot")
