# -*- coding: utf-8 -*-

import sys, time, cPickle
import json
import redis
import twitter
from twitter__login import login as twitter_login
from twitter__util import makeTwitterRequest 
from twitter__util import getUserInfo

LIMIT=10000

#
# Request a list of user IDs who this person is *following*
# Twitter calls the people you are following "friends"
#
def getFriendIDs(tSearch=None, screen_name=None, user_id=None, friends_limit=LIMIT):
    assert screen_name is not None or user_id is not None
    assert tSearch is not None
    
    ids = []
    cursor = -1
    while( cursor != 0 ):
        # create a dictionary object with {'cursor':cursor}
        params = dict(cursor=cursor)
        if screen_name is not None:
            # add screen name parameter
            params['screen_name'] = screen_name
        else:
            # add user ID parameter
            params['user_id'] = user_id
        try:
            response = makeTwitterRequest(tSearch, tSearch.friends.ids, **params)
        except twitter.api.TwitterHTTPError, err:
            if( err.e.code == 400 ):
                tSearch = twitter_login() 
                response = makeTwitterRequest(tSearch, tSearch.friends.ids, **params)
            else:
                raise err   
        if( response ):
            # sleep a small amount for each request
            time.sleep(5.0)
            ids.extend(response['ids'])
            cursor = response['next_cursor']
        else:
            # when response is empty/null, then give up, sorry
            time.sleep(2.0)
            break
        if len(ids) >= friends_limit:
            break
    return ids


#
# Request a list of user IDs who are *followers* of this user
#
def getFollowerIDs(tSearch=None, screen_name=None, user_id=None, followers_limit=LIMIT):
    assert screen_name is not None or user_id is not None
    assert tSearch is not None
    
    ids = []
    cursor = -1
    while( cursor != 0 ):
        params = dict(cursor=cursor)
        if screen_name is not None:
            params['screen_name'] = screen_name
        else:
            params['user_id'] = user_id
        try:
            response = makeTwitterRequest(tSearch, tSearch.followers.ids, **params)
        except twitter.api.TwitterHTTPError, err:
            if( err.e.code == 400 ):
                tSearch = twitter_login() 
                response = makeTwitterRequest(tSearch, tSearch.followers.ids, **params)
            else:
                raise err   
        if( response ):
            time.sleep(5.0)
            ids.extend(response['ids'])
            cursor = response['next_cursor']
        else:
            # when response is empty/null, then give up, sorry
            time.sleep(2.0)
            break
        if len(ids) >= followers_limit:
            break
    return ids


#
# Request a single user profile from Twitter
#
def getUserProfile(tSearch=None, user_id=None, screen_name=None):
    assert user_id is not None or screen_name is not None
    assert tSearch is not None
    response = []
    if( screen_name ):
        if( screen_name is list ):
            ssname = screen_name[0]
            for n in screen_name[1:]:
                ssname = ssname+","+n
        else:
            ssname = screen_name
        try:
            r = makeTwitterRequest(tSearch, tSearch.users.lookup, screen_name=ssname)
        except twitter.api.TwitterHTTPError, err:
            if( err.e.code == 400 ):
                # a long wait/sleep seems to generate a time out, re-login
                tSearch = twitter_login()
                r = makeTwitterRequest(tSearch, tSearch.users.lookup, screen_name=ssname)
            else:
                r = []
                raise err   
    else:
        if( user_id is list ):
            suid = str(user_id[0])
            for u in user_id[1:]:
                suid = suid+","+u
        else:
            suid = str(user_id)
        try:
            r = makeTwitterRequest(tSearch, tSearch.users.lookup, user_id=suid)
        except twitter.api.TwitterHTTPError, err:
            if( err.e.code == 400 ):
                # a long wait/sleep seems to generate a time out, re-login
                tSearch = twitter_login()
                r = makeTwitterRequest(tSearch, tSearch.users.lookup, user_id=suid)
            else:
                r = []
                raise err   
    if( r ):
        time.sleep(5.0)
        response = r
    else:
        time.sleep(2.0)
    return response


#
# Given a list of user IDs, get each profile and print a simple summary
#
def getPrintProfiles(tSearch=None, uid_list=None):
    assert tSearch is not None and uid_list is not None
    for uid in uid_list:
        profile = getUserProfile(tSearch,uid)
        if( profile ):
            name = profile['name'].encode('utf-8')
            #print "Got profile for: %s"%(profile[0]['name'])
            print "Got profile for: %s"%(name)
            if(profile['description']):
                descrip = profile['description'].encode('utf-8')
            else:
                descrip = "<empty>"
            #print "\tDescription: %s"%(profile[0]['description'])
            print "\tDescription: %s"%(descrip)
            print "\tFollowing count: %d"%(profile['friends_count'])
            print "\tFollower count: %d"%(profile['followers_count'])
            print "\tStatus updates: %d"%(profile['statuses_count'])
        else:
            print "Profile was EMPTY!"
        print
    return


#
# Take a list of user ids. Make profile requests in sets of 100 user IDs
# and save the chunks into the redis DB. The
#
def getAndSaveProfiles(tSearch=None, r=None, uid_list=None, force=False):
    assert tSearch is not None and r is not None and uid_list is not None
    proCount = 0
    list_chunk = uid_list[:100]
    list_rem = uid_list[100:]
    while( list_chunk ):
        proCount += getAndSaveOneProfile(tSearch=tSearch,r=r,uid=list_chunk,force=force)
        list_chunk = list_rem[:100]
        list_rem = list_rem[100:]
    return proCount
#
# Save one profile in redis
#
def getAndSaveOneProfile(tSearch=None, r=None, uid=None, force=False):
    assert tSearch is not None and r is not None and uid is not None
    proCount = 0
    # if we force update
    if( force ):
        # get the profile
        profile = getUserProfile(tSearch=tSearch,user_id=uid)
        if( profile ):
            # save the profile if it is not empty
            for p in profile:
                proCount += 1
                redisUpdateUserProfile(r,p)
    else:
        # not forcing update
        pf = redisGetUserProfile(r,uid)
        # check to see if the profile exists in the redis DB
        if(not pf):
            # if it doesn't exist, fetch it and update
            profile = getUserProfile(tSearch=tSearch,user_id=uid)
            if( profile ):
                for p in profile:
                    proCount += 1
                    redisUpdateUserProfile(r,p)
    return proCount

#
# Take a list of people who this user is following. For each user in that list
# request and save the users who that one user is following. This collects two
# levels, following of following (friends of friends)
#
def getAndSaveFriendLists(tSearch=None, r=None, uid_list=None, force=False):
    assert tSearch is not None and r is not None and uid_list is not None
    followCounts = []
    for uid in uid_list:
        if( uid>0 ):
            c = getAndSaveOneFriendList(tSearch=tSearch,r=r,uid=uid,force=force)
            followCounts.append(c)
    return followCounts
#
# Request and save one *following* (aka, friend) list in redis
#
def getAndSaveOneFriendList(tSearch=None, r=None, uid=None, force=False):
    assert tSearch is not None and r is not None and uid is not None
    followCount = 0
    # if we force update
    if( force ):
        # get the list of following IDs
        following = getFriendIDs(tSearch=tSearch,user_id=uid)
        if( following ):
            # save the profile if it is not empty
            followCounts = len(following)
            redisUpdateFriendList(r=r,uid=uid,flist=following)
    else:
        # not forcing update
        fl = redisGetFriendList(r=r,uid=uid)
        # check to see if the following list is in the redis DB
        if(not fl):
            # if it doesn't exist, fetch it and update
            following = getFriendIDs(tSearch=tSearch,user_id=uid)
            if( following ):
                followCount = len(following)
                redisUpdateFriendList(r=r,uid=uid,flist=following)
    return followCount


#
# Given a list of people who are following this user, move through that list
# one user at a time. For each user request that users follower list and save
# that list into redis. This collects followers of followers which is two
# levels out.
#
def getAndSaveFollowerLists(tSearch=None, r=None, uid_list=None, force=False):
    assert tSearch is not None and r is not None and uid_list is not None
    followCounts = []
    for uid in uid_list:
        if( uid>0 ):
            c = getAndSaveOneFollowerList(tSearch=tSearch,r=r,uid=uid,force=force)
            followCounts.append(c)
    return followCounts
#
# Request one follower list from Twitter and save that list in redis
#
def getAndSaveOneFollowerList(tSearch=None, r=None, uid=None, force=False):
    assert tSearch is not None and r is not None and uid is not None
    followerCount = 0
    # if we force update
    if( force ):
        # get the list of follower IDs
        followers = getFollowerIDs(tSearch=tSearch,user_id=uid)
        if( followers ):
            # save the profile if it is not empty
            followerCounts = len(followers)
            redisUpdateFollowerList(r=r,uid=uid,flist=followers)
    else:
        # not forcing update
        fr = redisGetFriendList(r=r,uid=uid)
        # check to see if the follower list is in the redis DB
        if(not fr):
            # if it doesn't exist, fetch it and update
            followers = getFollowerIDs(tSearch=tSearch,user_id=uid)
            if( followers ):
                followerCount = len(followers)
                redisUpdateFollowerList(r=r,uid=uid,flist=followers)
    return followerCount


#
# Take a profile or user id, generate user id keys, save/update the following list
# In Twitter the people you are following are also known as "friends"
#
def redisUpdateFriendList(r=None, prof=None, uid=None, flist=None):
    assert prof is not None or uid is not None
    assert r is not None and flist is not None 
    
    value_type = "friend_list"
    if( prof ):
        user_id_key = "user_id$"+str(prof['id'])+"$"+value_type
    else:
        user_id_key = "user_id$"+str(uid)+"$"+value_type
    
    # create a list comprehension of follower ID strings
    follow_list = [ str(i) for i in flist ]
    
    # add the list of followers via a user ID
    r.sadd(user_id_key, follow_list)
    return


#
# Take a profile or user id, generate user id keys, save/update the follower list
# This is the list of people who are followers of this user
#
def redisUpdateFollowerList(r=None, prof=None, uid=None, flist=None):
    assert prof is not None or uid is not None
    assert r is not None and flist is not None 
    
    value_type = "follower_list"
    if( prof ):
        user_id_key = "user_id$"+str(prof['id'])+"$"+value_type
    else:
        user_id_key = "user_id$"+str(uid)+"$"+value_type
    
    # create a list comprehension of follower ID strings
    follow_list = [ str(i) for i in flist ]
    
    # add the list of followers via a user ID
    r.sadd(user_id_key, follow_list)
    return


#
# Given a profile or a user id get the users being followed from redis server
#
def redisGetFriendList(r=None, prof=None, uid=None):
    assert prof is not None or uid is not None
    assert r is not None 
    
    value = []
    
    value_type = "friend_list"
    if( prof ):
        user_id_key = "user_id$"+str(prof['id'])+"$"+value_type
    else:
        user_id_key = "user_id$"+str(uid)+"$"+value_type
    
    mset = r.smembers(user_id_key)
    if( mset ):
        mlist = [ elt for elt in mset ]
        slist = mlist[0].replace('[','').replace(']','').replace("'",'').split(',')
        value = [ int(elt) for elt in slist ]
    return value


#
# Given a profile or a user id get the list of followers from redis server
#
def redisGetFollowerList(r=None, prof=None, uid=None):
    assert prof is not None or uid is not None
    assert r is not None 
    
    value = []
    
    value_type = "follower_list"
    if( prof ):
        user_id_key = "user_id$"+str(prof['id'])+"$"+value_type
    else:
        user_id_key = "user_id$"+str(uid)+"$"+value_type
    
    mset = r.smembers(user_id_key)
    if( mset ):
        mlist = [ elt for elt in mset ]
        slist = mlist[0].replace('[','').replace(']','').replace("'",'').split(',')
        value = [ int(elt) for elt in slist ]
    return value


#
# Take a profile, generate screen name and user id keys, save the profile
#
def redisUpdateUserProfile(r=None, prof=None):
    assert r is not None and prof is not None
    value_type = "info.json"
    screen_name_key = "screen_name$"+prof['screen_name']+"$"+value_type
    user_id_key = "user_id$"+str(prof['id'])+"$"+value_type
    
    # add or change the current profile - indexed by screen name and user ID
    r.set(screen_name_key, json.dumps(prof))
    r.set(user_id_key, json.dumps(prof))
    return


#
# Given a screen name or a user id get the profile from redis server
#
def redisGetUserProfile(r=None, user_id=None, screen_name=None):
    assert screen_name is not None or user_id is not None
    assert r is not None
    value = {}
    value_type = "info.json"
    if( screen_name ):
        key = "screen_name$"+screen_name+"$"+value_type
    else:
        key = "user_id$"+str(user_id)+"$"+value_type
    redis_value = r.get(key)
    if( redis_value ):
        value = json.loads(redis_value)
    return value


#
# This pickles data to save the state of the collection in case of a crash
# A checkpoint is just a little scratch space that keeps track of where things
# are in the crawl of friends and followers.
#
def saveCheckpointData(fname="crawl", uname="username", tData=None):
    assert tData is not None
    fn = "%s-%s.checkpoint.pickle"%(fname,uname)
    f = open(fn,"wb")
    if( f ):
        cPickle.dump(tData,f)
        f.close()
        return True
    else:
        print "Error: saveCheckpointData(), could not open file."
        return False
    return False
#
# Read pickle data to reload after a crash. This only happens if you use the
# -restart flag on the command line
#
def loadCheckpointData(fname="crawl", uname="username"):
    fn = "%s-%s.checkpoint.pickle"%(fname,uname)
    f = open(fn,"r")
    if( f ):
        tData = cPickle.load(f)
        f.close()
        return tData
    else:
        print "Error: loadCheckpointData(), could not open file."
        return []
    return []


#
# This collects a lot of data, it runs through all friends and followers of
# the person specified with "uid" and collects all profiles and all user ids
# of each friend and follower, this is a two level crawl
#
def collectFriendsAndFollowers(tSearch=None, db=None, uid=None, force=False):
    assert tSearch is not None and db is not None and uid is not None
    # first collect and save for people who are being followed (aka, "Friends")
    following_list = getFriendIDs(tSearch=tSearch,user_id=uid)
    pc = getAndSaveProfiles(tSearch=tSearch, r=db, uid_list=following_list, force=force)
    print "\tFollowing profiles:",pc
    time.sleep(15.0)
    fl = getAndSaveFriendLists(tSearch=tSearch, r=db, uid_list=following_list, force=force)
    print "\tFollowing:",len(fl)
    print "\tFollowing counts:",fl
    time.sleep(15.0)
    # now collect and save for followers
    follower_list = getFollowerIDs(tSearch=tSearch,user_id=uid)
    pc = getAndSaveProfiles(tSearch=tSearch, r=db, uid_list=follower_list, force=force)
    time.sleep(15.0)
    print "\tFollower profiles:",pc
    fl = getAndSaveFollowerLists(tSearch=tSearch, r=db, uid_list=follower_list, force=force)
    print "\tFollower:",len(fl)
    print "\tFollower counts:",fl
    time.sleep(15.0)


def parse_params(argv):
    user_name = ""
    one = False
    restart = False
    pc = 1
    while( pc < len(argv) ):
        param = argv[pc]
        if( param == "-user"):
            pc += 1
            user_name = argv[pc] 
        if( param == "-one"):
            one = True 
        if( param == "-restart"):
            restart = True 
        pc += 1
    return {'user_name':user_name, 'one':one, 'restart':restart }


def usage(prog):
    print "USAGE: %s -user <twitter_username> [-one] [-restart]"%(prog)
    sys.exit(0)


def main(argv):
    if len(argv) < 3:
        print "ERROR: Must specify a twitter username"
        usage(sys.argv[0])

    params = parse_params(sys.argv)
    if( not params['user_name'] ):
        print "ERROR: Must specify a twitter username"
        usage(sys.argv[0])

    # Login to twitter and open the redis DB
    t = twitter_login()
    db = redis.Redis()

    # Collect the information for the one starting user
    start_uname = params['user_name'].encode('utf-8')
    prof = getUserProfile(tSearch=t,screen_name=start_uname)
    start_prof = prof[0]
    start_name = start_prof['name'].encode('utf-8')
    start_uid = start_prof['id']
    getAndSaveOneProfile(tSearch=t,r=db,uid=start_uid)
    
    if( not params['restart'] ):
        # the "real" way to do this is to craw all the friends and followers
        # of the starting point - this would produce three full levels beyond
        # the starting person.
        if( not params['one'] ):
            print "Starting crawl with user: \"%s\" name: %s (%d)"%(start_uname, start_name, start_uid)
            following_ids = getFriendIDs(t,screen_name=params['user_name'])
            follower_ids = getFollowerIDs(t,screen_name=params['user_name'])
        else:
            print "Collecting for one user: \"%s\" name: %s (%d)"%(start_uname, start_name, start_uid)
            # one, means just do the one user we enter, not multiple levels
            # actually this gets two levels, this person and this users friends/followers
            following_ids = [start_uid]
            # if following and followers were the same, then only need to do one
            follower_ids = []
    else:
        # a restart condition
        print "Attempting to restart for: \"%s\" name: %s (%d)"%(start_uname, start_name, start_uid)
        data = loadCheckpointData(uname=start_uname)
        if( data ):
            following_ids = data[0]
            follower_ids = data[1]
        else:
            print "Error: Could not load checkpoint for successful restart."
            sys.exit(0)
    
    saveCheckpointData(uname=start_uname, tData=[following_ids,follower_ids])
    print "Collecting %d friends for: \"%s\" name: %s (%d)"%(len(following_ids), start_uname, start_name, start_uid)
    while following_ids:
        user = following_ids.pop()
        t = twitter_login()
        prof = getUserProfile(tSearch=t,user_id=user)
        p = prof[0]
        sname = p['screen_name'].encode('utf-8')
        name = p['name'].encode('utf-8')
        print "Collecting for: \"%s\" name: %s (%d)"%(sname,name,user)
        collectFriendsAndFollowers(tSearch=t, db=db, uid=user)
        saveCheckpointData(uname=start_uname,tData=[following_ids,follower_ids])

    print
    print "Collecting %d followers for \"%s\" name: %s (%d)"%(len(follower_ids), start_uname, start_name, start_uid)
    while follower_ids:
        user = follower_ids.pop()
        t = twitter_login()
        prof = getUserProfile(tSearch=t,user_id=user)
        p = prof[0]
        sname = p['screen_name'].encode('utf-8')
        name = p['name'].encode('utf-8')
        print "Collecting for: \"%s\" name: %s (%d)"%(sname,name,user)
        collectFriendsAndFollowers(tSearch=t, db=db, uid=user)
        saveCheckpointData(uname=start_uname,tData=[following_ids,follower_ids])
    print "Done"

if __name__ == '__main__':
    main(sys.argv)   
