Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import itertools
import requests
import json
import pandas
import time
import matplotlib.pyplot as plt
from pandas.core.dtypes.common import is_numeric_dtype
sisu = "https://steamspy.com/api.php?request=all&page="
# There are 67 pages of data but for the heck of it,
# we're going try to load 100 pages
def get_all_data(iterations: int = 100):
for i in range(iterations):
print(i)
url = sisu + str(i)
try:
response = json.loads(requests.get(url).text)
except Exception as some_shit:
print(some_shit)
break
games = [value for (key, value) in response.items()]
if i == 0:
df = pandas.DataFrame(games)
else:
df_comb = pandas.DataFrame(games)
df = pandas.concat([df, df_comb], ignore_index=True, sort=False)
time.sleep(0)
return df
def add_user_rating(df):
def user_rating_function(pos, neg):
if pos == neg == 0:
return 0
return pos / (pos + neg)
df["Review_rating"] = df.apply(lambda row: user_rating_function(row.positive, row.negative), axis=1)
return df
def create_hist_plots(df):
for col_name in df.columns:
if is_numeric_dtype(df[col_name]):
fig = plt.figure()
plt.hist(df[col_name], log=True)
title = " ".join([col_name, "log histogram"])
plt.title(title)
fig.savefig("".join(["images\\",title,".png"]))
plt.show()
def replace_owner_number_with_symbol(df):
def owner_strip(user_range: str):
user_range = user_range.replace(",000,000", " M")
user_range = user_range.replace(",000", " k")
return user_range
df["owners"] = df["owners"].apply(lambda name: owner_strip((name)))
return df
def create_heat_maps(df, plot_pairs):
for (x, y) in plot_pairs:
plt.figure() # Create a new figure for each heatmap
heatmap = plt.imshow(df[[x, y]].values, cmap='hot', interpolation='nearest', aspect='auto')
plt.colorbar(heatmap) # Add a colorbar
plt.xlabel(x)
plt.ylabel(y)
plt.title(f"Heatmap of {x} vs {y}")
plt.show()
def price_to_dollars(convert_df):
convert_df["price"] = convert_df["price"].apply(lambda val: int(val)/100 if int(val) != 0 else 0)
return convert_df
if __name__ == "__main__":
df = get_all_data(5)
df = add_user_rating(df)
df = replace_owner_number_with_symbol(df)
df = price_to_dollars(df)
h = df.describe()
j = df.isna().sum()
c = df["userscore"].value_counts()
numeric_cols = [col for col in df.columns if is_numeric_dtype(df[col])]
plot_pairs = list(itertools.combinations(numeric_cols, 2))
print(df.columns)
print("price", df["price"].unique())
print("discount", df["discount"].unique())
print("owners", df["owners"].unique())
create_hist_plots(df)
plt.hist(df["owners"], log=True)
plt.xticks(rotation='vertical')
plt.title("Histogram of game playerbase sizes with log scale")
plt.tight_layout()
plt.show()
df.to_csv("game_data.csv")
pass