{ "cells": [ { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 10000 / 10000" ] } ], "source": [ "import tweepy\n", "from datetime import datetime\n", "\n", "print_tweets = False\n", "n_tweets = 10000\n", "max_results = 100\n", " \n", "\n", "auth = tweepy.OAuth1UserHandler(\n", " consumer_key, consumer_secret, access_token, access_token_secret\n", ")\n", "\n", "api = tweepy.API(auth)\n", "client = tweepy.Client(bearer_token)\n", " \n", "query = \"#metoo\"\n", "\n", "tweet_fields = ['attachments', 'author_id', 'text', 'in_reply_to_user_id', 'referenced_tweets', 'entities']\n", "next_token = None\n", "end_time = datetime(year=2022,month=5,day=28)\n", "\n", "all_tweets = []\n", "\n", "\n", "# This endpoint/method returns Tweets from the last seven days\n", "while len(all_tweets) < n_tweets:\n", " response = client.search_recent_tweets(query, tweet_fields=tweet_fields,\n", " max_results=max_results, next_token=next_token, end_time=end_time)\n", " tweets = response.data\n", " if tweets == None:\n", " break\n", "\n", " for tweet in tweets:\n", " all_tweets.append(tweet)\n", " \n", " if print_tweets:\n", " print(tweet.text)\n", " print(\"-------------------\")\n", " \n", " if len(all_tweets) == n_tweets:\n", " break\n", "\n", " if print_tweets:\n", " print(len(all_tweets), \"/\", n_tweets)\n", " else:\n", " print(\"\\r\",len(all_tweets), \"/\", n_tweets, end=\"\")\n", "\n", " if 'next_token' in response.meta:\n", " next_token = response.meta['next_token']\n", " else:\n", " break" ] }, { "cell_type": "code", "execution_count": 116, "metadata": {}, "outputs": [], "source": [ "import json\n", "with open(\"tweets.txt\", \"w+\") as file:\n", " tweet_dicts = []\n", " for tweet in all_tweets:\n", " dict_tweet = dict(tweet)\n", " if tweet.referenced_tweets != None: \n", " dict_tweet['referenced_tweets'] = [dict(r_tweet) for r_tweet in tweet.referenced_tweets]\n", " tweet_dicts.append(dict_tweet)\n", " file.write(json.dumps(tweet_dicts))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import json\n", "with open(\"tweets.txt\", \"r\") as file:\n", " all_tweets_json = json.load(file)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8077\n", "9429\n", "1.1673888820106475\n" ] } ], "source": [ "import pickle\n", "from create_enron_network import tokenize_text, get_labels\n", "from edgelabelgraph import EdgeLabelGraph\n", "\n", "G = EdgeLabelGraph()\n", "\n", "for tweet in all_tweets_json:\n", " \n", " author = tweet['author_id']\n", " \n", " # Use hashtags as labels\n", " if not ('entities' in tweet and 'hashtags' in tweet['entities']):\n", " continue\n", " labels = set([hashtag['tag'].lower() for hashtag in tweet['entities']['hashtags']])\n", " \n", " if 'mentions' in tweet['entities']:\n", " referenced_users = set([int(user['id']) for user in tweet['entities']['mentions']])\n", " \n", " if 'in_reply_to_user_id' in tweet:\n", " referenced_users.add(tweet['in_reply_to_user_id'])\n", " \n", " for user in referenced_users:\n", " if user != author:\n", " G.add_edge_with_labels((author, user), labels)\n", " \n", "print(G.number_of_nodes())\n", "print(G.number_of_edges())\n", "print(G.density())\n", "\n", "with open(\"tweets_graph.pkl\", \"wb\") as file:\n", " pickle.dump(G, file)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 4 }