#!/usr/bin/python
# -*- coding: utf-8 -*-
#
#
import sys, time, json, cPickle
from datetime import datetime
from twitter__login import login as twitter_login
from twitter__util import makeTwitterRequest 
import twitter_text
import twitter
import random

#
#
def handleHTTPError(err=None,waitT=600):
    assert err is not None
    if err.e.code == 401:
        # probably ought to just skip this item
        print "Twitter 401 Error, resource is protected."
        return True
    elif err.e.code in [402, 403, 502, 503]:
        print "Twitter %d Error, sleep for %s"%(err.e.code,str(waitT))
        time.sleep(waitT)
        return True
    else:
        return False
    return False


#
# This function is right out of the book, just make a list of the entites
#
def getEntities(twt):
    extractor = twitter_text.Extractor(twt['text'])
    entities = {}

    entities['user_mentions'] = []
    for um in extractor.extract_mentioned_screen_names_with_indices():
        entities['user_mentions'].append(um)
    
    entities['hashtags'] = []
    for ht in extractor.extract_hashtags_with_indices():
        ht['text'] = ht['hashtag']
        del ht['hashtag']
        entities['hashtags'].append(ht)        
        
    entities['urls'] = []
    for url in extractor.extract_urls_with_indices():
        entities['urls'].append(url)
    
    return entities


def queryTrends(woeID=1):
    trendList = []
    tTrends = twitter.Twitter()
    if( tTrends ):
        try:
            trendQuery = tTrends.trends._(woeID)
            trendList = trendQuery()
        except twitter.api.TwitterHTTPError, e:
            trendList = []
            if not handleHTTPError(err=e):
                print "Twitter Error:",e
    return trendList

#
# The keys that you might extract include:
# "query", "name", "url", "events", "promoted_content"
#
def extractTrendValues(trendList=None, key="query"):
    values = []
    if( trendList ):
        trendDict = trendList[0]['trends']
        values = [ elt[key] for elt in trendDict ]
    return values


def makeRequest(c=None, qs="", page=None, ps=250, report=False, calls=0):
    assert c is not None and qs is not ""
    r = []
    try:
        r = c.search(q=qs,rpp=ps,page=page)
        if( r ):
            items = len(r['results'])
            if( report ):
                print "\tpage: %d, got %d items"%(page,items)
        else:
            r = []
    except twitter.api.TwitterHTTPError, err:
        if( calls > 2 ):
            return r
        if(handleHTTPError(err=err)):
            connection = twitter.Twitter(domain="search.twitter.com")
            r = makeRequest(c=connection,qs=qs,page=page,ps=ps,report=report,calls=(calls+1))
        else:
            raise err
    return r


def queryTweets(qs="", pages=10, ps=250, report=False):
    assert qs is not ""
    results = []
    last_count = -1
    connection = twitter.Twitter(domain="search.twitter.com")
    if( not connection ):
        print "Error: queryTweets(): CONNECTION object None: \"%s\""%(qs)
    if( qs ):
        for p in range(1,(pages+1)):
            r = makeRequest(c=connection,qs=qs,page=p,ps=ps,report=report,calls=0)
            if( r ):
                items = len(r['results'])
                if( last_count==0 and items==0 ):
                    print "\tzeros: got zero items twice, exiting"
                    return results
                last_count = items
                results.append(r)
                time.sleep(2)    
    return results


def getResultsList(pageList=None):
    assert pageList is not None
    values = []
    values = [ t for p in pageList for t in p['results'] ]        
    return values


#
# This simply pickles the data that was collected. The nice thing here
# is that this will time and datestamp the file. You can run this program
# seconds apart and the files won't collide. This could be good for a
# time series analysis. Of course, you might really want this in a DB or
# structured some other way.
#
def saveTwitterData(fname="twitter_trends",tData=None):
    if( not tData ):
        return False
    dtstr = str(datetime.now())
    dtstr = dtstr.replace(' ','_')
    dtstr = dtstr.replace(':','')
    dtstr = dtstr.split('.')
    fn = "%s.%s.pickle"%(fname,dtstr[0])
    f = open(fn,"wb")
    if( f ):
        cPickle.dump(tData,f)
        f.close()
        return True
    else:
        print "Error: saveTwitterData(), could not open file."
        return False
    return False



def main(argv):
    print "Fetch trending data"
    trendData = queryTrends()
    print "Convert trend data to list of search terms"
    queryTermList = extractTrendValues(trendData,key="query")
    # the 'name' value is printing friendly
    printTermList = extractTrendValues(trendData,key="name")
    trend_dict = {}
    print "Querying ..."
    i = random.randint(0,9)
    qt = queryTermList[i]
    pt = printTermList[i].encode('utf-8')
    print "Query for: \"%s\""%(pt)
    tPages = queryTweets(qs=qt,pages=10,ps=100,report=True)
    tweets = getResultsList(tPages)
    
    for t in tweets:
        ents = getEntities(t)
        t['entities'] = ents
    
    print json.dumps(tweets,sort_keys=True,indent=4)

    print "Pickle/save the results"
    saveTwitterData(fname="TweetEntities",tData=tweets)


if __name__ == '__main__':
    main(sys.argv)   
