Skip to content
Snippets Groups Projects
Commit f2053b0c authored by Max Väistö's avatar Max Väistö
Browse files

Initial commit

parent 6cb537bd
No related branches found
No related tags found
No related merge requests found
Showing
with 5139 additions and 0 deletions
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (pythonProject)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/pythonProject.iml" filepath="$PROJECT_DIR$/.idea/pythonProject.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.11 (pythonProject)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
This diff is collapsed.
api_exploration/images/Review_rating log histogram.png

11.6 KiB

api_exploration/images/appid log histogram.png

10.5 KiB

api_exploration/images/average_2weeks log histogram.png

13.9 KiB

api_exploration/images/average_forever log histogram.png

12.9 KiB

api_exploration/images/ccu log histogram.png

11.1 KiB

api_exploration/images/median_2weeks log histogram.png

13.4 KiB

api_exploration/images/median_forever log histogram.png

12.3 KiB

api_exploration/images/negative log histogram.png

11.9 KiB

api_exploration/images/positive log histogram.png

11.7 KiB

api_exploration/images/price log histogram.png

11.3 KiB

api_exploration/images/userscore log histogram.png

11.3 KiB

import itertools
import requests
import json
import pandas
import time
import matplotlib.pyplot as plt
from pandas.core.dtypes.common import is_numeric_dtype
sisu = "https://steamspy.com/api.php?request=all&page="
# There are 67 pages of data but for the heck of it,
# we're going try to load 100 pages
def get_all_data(iterations: int = 100):
for i in range(iterations):
print(i)
url = sisu + str(i)
try:
response = json.loads(requests.get(url).text)
except Exception as some_shit:
print(some_shit)
break
games = [value for (key, value) in response.items()]
if i == 0:
df = pandas.DataFrame(games)
else:
df_comb = pandas.DataFrame(games)
df = pandas.concat([df, df_comb], ignore_index=True, sort=False)
time.sleep(0)
return df
def add_user_rating(df):
def user_rating_function(pos, neg):
if pos == neg == 0:
return 0
return pos / (pos + neg)
df["Review_rating"] = df.apply(lambda row: user_rating_function(row.positive, row.negative), axis=1)
return df
def create_hist_plots(df):
for col_name in df.columns:
if is_numeric_dtype(df[col_name]):
fig = plt.figure()
plt.hist(df[col_name], log=True)
title = " ".join([col_name, "log histogram"])
plt.title(title)
fig.savefig("".join(["images\\",title,".png"]))
plt.show()
def replace_owner_number_with_symbol(df):
def owner_strip(user_range: str):
user_range = user_range.replace(",000,000", " M")
user_range = user_range.replace(",000", " k")
return user_range
df["owners"] = df["owners"].apply(lambda name: owner_strip((name)))
return df
def create_heat_maps(df, plot_pairs):
for (x, y) in plot_pairs:
plt.figure() # Create a new figure for each heatmap
heatmap = plt.imshow(df[[x, y]].values, cmap='hot', interpolation='nearest', aspect='auto')
plt.colorbar(heatmap) # Add a colorbar
plt.xlabel(x)
plt.ylabel(y)
plt.title(f"Heatmap of {x} vs {y}")
plt.show()
def price_to_dollars(convert_df):
convert_df["price"] = convert_df["price"].apply(lambda val: int(val)/100 if int(val) != 0 else 0)
return convert_df
if __name__ == "__main__":
df = get_all_data(5)
df = add_user_rating(df)
df = replace_owner_number_with_symbol(df)
df = price_to_dollars(df)
h = df.describe()
j = df.isna().sum()
c = df["userscore"].value_counts()
numeric_cols = [col for col in df.columns if is_numeric_dtype(df[col])]
plot_pairs = list(itertools.combinations(numeric_cols, 2))
print(df.columns)
print("price", df["price"].unique())
print("discount", df["discount"].unique())
print("owners", df["owners"].unique())
create_hist_plots(df)
plt.hist(df["owners"], log=True)
plt.xticks(rotation='vertical')
plt.title("Histogram of game playerbase sizes with log scale")
plt.tight_layout()
plt.show()
df.to_csv("game_data.csv")
pass
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment