Skip to content
Snippets Groups Projects
Market_performance_function.py 4.33 KiB
Newer Older
from typing import Sequence, Optional, Any
import pandas as pd
import plotly.express as px


# Three functions by Max, to get the mean value of game owner and split the company names.

DEFAULT_ILLEGAL_CONTINUATIONS = {"INC.", "LLC", "CO.", "LTD.", "S.R.O."}


def get_owner_means(owner_limits: Sequence[Any]):
    if not isinstance(owner_limits, list):
        return owner_limits
    else:
        return (owner_limits[0] + owner_limits[1]) / 2


def convert_owners_to_limits(owner_limit):
    if not isinstance(owner_limit, str):
        return owner_limit
    owners_raw = [rev.replace(" ", "") for rev in owner_limit.split(" .. ")]
    owners_clean = []
    for owner_limit in owners_raw:
        owner_limit = owner_limit.replace("M", "0" * 6)
        owner_limit = owner_limit.replace("k", "0" * 3)
        owners_clean.append(int(owner_limit))
    return owners_clean


def split_companies(arr, illegal_continuations: Optional[Sequence[str]] = None):
    """
    Splits the given string at comma sign as long as following the comma none of the illegal
    continuations happen. In such a case, the string split does not happen that said comma.
    :param arr: Array containing the developers/publishers for a single game
    :param illegal_continuations: A list of illegal continuations. Must be uppercase.
    :return: Returns the given split input string as a list.
    :note: If the arr is numpy.NaN, this value is returned instead of a list.
    """
    if illegal_continuations is None:
        illegal_continuations = DEFAULT_ILLEGAL_CONTINUATIONS
    if pd.isna(arr):
        return arr

    results_list = []
    start_index = 0
    split_char = ", "

    for index in range(len(arr)):
        if index < len(arr) - 1:
            txt = arr[index:index + 2]
            if txt == split_char:
                found_illegal = False
                min_continuation = min([len(continuation) for continuation in illegal_continuations])
                max_continuation = max([len(continuation) for continuation in illegal_continuations])
                next_chars = arr[index + min_continuation:index + min_continuation + max_continuation]
                for i in range(index + min_continuation, index + len(next_chars) + 2):
                    comp_txt = arr[index + 2:i + 2].upper()
                    if comp_txt in illegal_continuations:
                        found_illegal = True
                        break
                if not found_illegal:
                    results_list.append(arr[start_index:index].strip())
                    start_index = index + 1
        elif index == len(arr) - 1:
            results_list.append(arr[start_index:index + 1].strip())

    return results_list


# The function to plot the bubble chart of market performance

def plot_market_performance(df, company_type: str, game_number_max: int, game_number_min: int):
    
    """
    This function creates a bubble chart of market performance.
    The variable company_type could only be developer or publisher.
    The bubble size is game number, which is filtered by the range [game_number_min, game_number_max].
    """

    df["owner_means"] = df["owners"].apply(lambda x: get_owner_means(convert_owners_to_limits(x)))

    # Calulate the revenue
    df["revenue"] = df["owner_means"]*df["price"]

    # Split the companies
    df["company_split"] = df[company_type].apply(lambda x: split_companies(x))
    df_split = df.assign(developer=df['company_split'].str.split(', ')).explode('company_split')

    # Calculate revenue and game number
    company_stats = df_split.groupby('company_split').agg(revenue=('revenue', 'sum'), game_number=('company_split', 'count'), owner_number=('owner_means', 'sum')).reset_index()

    # Filter the Game number
    filter_company_stats = company_stats[(company_stats['game_number'] >= game_number_min) & (company_stats['game_number'] <= game_number_max)]

    # Bubble chart
    fig = px.scatter(filter_company_stats, x='revenue', y='owner_number', size='game_number', log_y=True, log_x=True,size_max = 40,
                 hover_name='company_split', title=f'Market Performance of {company_type}')

    return fig



if __name__ == "__main__":
    # Read data and calulate owner means
    df = pd.read_csv("full_game_data.csv")
    game_number_min = 1
    game_number_max = 10
    fig = plot_market_performance(df, 'developer', game_number_max, game_number_min)
    fig.show()