Skip to content
Snippets Groups Projects
Commit f4ed460e authored by Iiro Kumpulainen's avatar Iiro Kumpulainen
Browse files

Upload New File

parent ae4563c6
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
import tweepy
from datetime import datetime
print_tweets = False
n_tweets = 10000
max_results = 100
auth = tweepy.OAuth1UserHandler(
consumer_key, consumer_secret, access_token, access_token_secret
)
api = tweepy.API(auth)
client = tweepy.Client(bearer_token)
query = "#metoo"
tweet_fields = ['attachments', 'author_id', 'text', 'in_reply_to_user_id', 'referenced_tweets', 'entities']
next_token = None
end_time = datetime(year=2022,month=5,day=28)
all_tweets = []
# This endpoint/method returns Tweets from the last seven days
while len(all_tweets) < n_tweets:
response = client.search_recent_tweets(query, tweet_fields=tweet_fields,
max_results=max_results, next_token=next_token, end_time=end_time)
tweets = response.data
if tweets == None:
break
for tweet in tweets:
all_tweets.append(tweet)
if print_tweets:
print(tweet.text)
print("-------------------")
if len(all_tweets) == n_tweets:
break
if print_tweets:
print(len(all_tweets), "/", n_tweets)
else:
print("\r",len(all_tweets), "/", n_tweets, end="")
if 'next_token' in response.meta:
next_token = response.meta['next_token']
else:
break
```
%% Output
10000 / 10000
%% Cell type:code id: tags:
``` python
import json
with open("tweets.txt", "w+") as file:
tweet_dicts = []
for tweet in all_tweets:
dict_tweet = dict(tweet)
if tweet.referenced_tweets != None:
dict_tweet['referenced_tweets'] = [dict(r_tweet) for r_tweet in tweet.referenced_tweets]
tweet_dicts.append(dict_tweet)
file.write(json.dumps(tweet_dicts))
```
%% Cell type:code id: tags:
``` python
import json
with open("tweets.txt", "r") as file:
all_tweets_json = json.load(file)
```
%% Cell type:code id: tags:
``` python
import pickle
from create_enron_network import tokenize_text, get_labels
from edgelabelgraph import EdgeLabelGraph
G = EdgeLabelGraph()
for tweet in all_tweets_json:
author = tweet['author_id']
# Use hashtags as labels
if not ('entities' in tweet and 'hashtags' in tweet['entities']):
continue
labels = set([hashtag['tag'].lower() for hashtag in tweet['entities']['hashtags']])
if 'mentions' in tweet['entities']:
referenced_users = set([int(user['id']) for user in tweet['entities']['mentions']])
if 'in_reply_to_user_id' in tweet:
referenced_users.add(tweet['in_reply_to_user_id'])
for user in referenced_users:
if user != author:
G.add_edge_with_labels((author, user), labels)
print(G.number_of_nodes())
print(G.number_of_edges())
print(G.density())
with open("tweets_graph.pkl", "wb") as file:
pickle.dump(G, file)
```
%% Output
8077
9429
1.1673888820106475
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment