Skip to content
Snippets Groups Projects
main.py 6.34 KiB
Newer Older
  • Learn to ignore specific revisions
  • Max Väistö's avatar
    Max Väistö committed
    import itertools
    
    import datetime
    from concurrent.futures import ThreadPoolExecutor
    
    Max Väistö's avatar
    Max Väistö committed
    
    import requests
    import json
    import pandas
    import time
    import matplotlib.pyplot as plt
    from pandas.core.dtypes.common import is_numeric_dtype
    
    
    STEAMSPY_ALL_GAMES_URL = "https://steamspy.com/api.php?request=all&page="
    STEAM_GAME_INFO_URL = "https://store.steampowered.com/api/appdetails?appids="
    STEAM_API_LANGUAGE = "&l=english"
    STEAM_SPY_GAME_INFO = "https://steamspy.com/api.php?request=appdetails&appid="
    
    Max Väistö's avatar
    Max Väistö committed
    
    
    # There are 67 pages of data but for the heck of it,
    # we're going try to load 100 pages
    
    def get_all_data(iterations: int = 100, num_threads: int = 4):
        def get_api_data_for_game_threaded(id):
            steam_api_data = get_additional_game_data_steam(str(id))
            steamspy_api_data = get_additional_game_data_steamspy(str(id))
            return steam_api_data, steamspy_api_data
        # def get_additional_game_data_steam_threaded(id):
        #     return get_additional_game_data_steam(str(id))
        # def get_additional_game_data_steamspy_threaded(id):
        #     return get_additional_game_data_steamspy(str(id))
    
    Max Väistö's avatar
    Max Väistö committed
        for i in range(iterations):
            print(i)
    
            url = STEAMSPY_ALL_GAMES_URL + str(i)
    
    Max Väistö's avatar
    Max Väistö committed
            try:
                response = json.loads(requests.get(url).text)
            except Exception as some_shit:
                print(some_shit)
                break
            games = [value for (key, value) in response.items()]
            if i == 0:
                df = pandas.DataFrame(games)
            else:
    
                df = pandas.concat([df, pandas.DataFrame(games)], ignore_index=True, sort=False)
    
        # df = df.iloc[0:10]
        # with ThreadPoolExecutor(max_workers=num_threads) as executor:
        #     steam_results = list(executor.map(get_additional_game_data_steam_threaded, df["appid"]))
        #
        # with ThreadPoolExecutor(max_workers=num_threads) as executor:
        #     steamspy_results = list(executor.map(get_additional_game_data_steamspy_threaded, df["appid"]))
        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            combined_results = list(executor.map(get_api_data_for_game_threaded, df["appid"]))
    
        steam_results = [result[0] for result in combined_results]
        steamspy_results = [result[1] for result in combined_results]
    
        df = pandas.concat([df, pandas.DataFrame(steam_results), pandas.DataFrame(steamspy_results)], axis=1)
    
        # steam_api_data = df["appid"].iloc[:].apply(lambda x: get_additional_game_data_steam(str(x)))
        # df = pandas.concat([df, steam_api_data], axis=1)
    
    Max Väistö's avatar
    Max Väistö committed
        return df
    
    
    
    def get_additional_game_data_steam(id):
        url = STEAM_GAME_INFO_URL + id + STEAM_API_LANGUAGE
    
        response = json.loads(requests.get(url).text)
        print(datetime.datetime.now())
        fails = 0
        base_wait = 60
        while not response or response[id]["success"] == False:
            # This part is meant to catch
            if response and response[id]["success"] == False:
                fails += 1
                if fails >= 10:
                    return pandas.Series({'platforms': [], 'release_date': "", 'categories': []})
            print("Failed queries for", id, "is", fails)
            time.sleep(base_wait)
            response = json.loads(requests.get(url).text)
    
    
        data = response[id]["data"]
        if data["type"] != "game":
            print("Non game found", data["name"])
        platforms = [platform for (platform, enabled) in data["platforms"].items() if enabled]
        release_date = data["release_date"]["date"]
        categories = [category_data["description"] for category_data in data["categories"]] if "categories" in data.keys() else []
        return_values = pandas.Series({'platforms': platforms, 'release_date': release_date, 'categories': categories})
        return return_values
    
    
    def get_additional_game_data_steamspy(id):
        url = STEAM_SPY_GAME_INFO + id
        response = json.loads(requests.get(url).text)
        # tags = [tag for (tag,tag_id) in response["tags"]]
        languages = response["languages"].split(", ")
        genres = response["genre"]
        ccu = response["ccu"]
        tags = [tag for (tag, tag_id) in response["tags"].items()] if response["tags"] else []
    
        return_values = dict(ccu=ccu, languages=languages, genres=genres, tags=tags)
        return return_values
    
    
    
    Max Väistö's avatar
    Max Väistö committed
    def add_user_rating(df):
        def user_rating_function(pos, neg):
            if pos == neg == 0:
                return 0
            return pos / (pos + neg)
    
        df["Review_rating"] = df.apply(lambda row: user_rating_function(row.positive, row.negative), axis=1)
        return df
    
    
    def create_hist_plots(df):
        for col_name in df.columns:
            if is_numeric_dtype(df[col_name]):
                fig = plt.figure()
                plt.hist(df[col_name], log=True)
                title = " ".join([col_name, "log histogram"])
                plt.title(title)
    
                fig.savefig("".join(["images\\", title, ".png"]))
    
    Max Väistö's avatar
    Max Väistö committed
                plt.show()
    
    
    def replace_owner_number_with_symbol(df):
        def owner_strip(user_range: str):
            user_range = user_range.replace(",000,000", " M")
            user_range = user_range.replace(",000", " k")
            return user_range
    
        df["owners"] = df["owners"].apply(lambda name: owner_strip((name)))
        return df
    
    
    def create_heat_maps(df, plot_pairs):
        for (x, y) in plot_pairs:
            plt.figure()  # Create a new figure for each heatmap
            heatmap = plt.imshow(df[[x, y]].values, cmap='hot', interpolation='nearest', aspect='auto')
            plt.colorbar(heatmap)  # Add a colorbar
            plt.xlabel(x)
            plt.ylabel(y)
            plt.title(f"Heatmap of {x} vs {y}")
            plt.show()
    
    
    def price_to_dollars(convert_df):
    
        convert_df["price"] = convert_df["price"].apply(lambda val: int(val) / 100 if int(val) != 0 else 0)
    
    Max Väistö's avatar
    Max Väistö committed
        return convert_df
    
    
    if __name__ == "__main__":
    
        df = get_all_data()
    
    Max Väistö's avatar
    Max Väistö committed
        df = add_user_rating(df)
        df = replace_owner_number_with_symbol(df)
        df = price_to_dollars(df)
    
        df.to_csv("game_data_experimental.csv")
    
    Max Väistö's avatar
    Max Väistö committed
        h = df.describe()
        j = df.isna().sum()
        c = df["userscore"].value_counts()
    
        numeric_cols = [col for col in df.columns if is_numeric_dtype(df[col])]
        plot_pairs = list(itertools.combinations(numeric_cols, 2))
        print(df.columns)
        print("price", df["price"].unique())
        print("discount", df["discount"].unique())
        print("owners", df["owners"].unique())
        create_hist_plots(df)
        plt.hist(df["owners"], log=True)
        plt.xticks(rotation='vertical')
        plt.title("Histogram of game playerbase sizes with log scale")
        plt.tight_layout()
        plt.show()
    
        df.to_csv("game_data_full.csv")
    
    Max Väistö's avatar
    Max Väistö committed
        pass