Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from typing import Sequence, Optional, Any
import pandas as pd
import plotly.express as px
# Three functions by Max, to get the mean value of game owner and split the company names.
DEFAULT_ILLEGAL_CONTINUATIONS = {"INC.", "LLC", "CO.", "LTD.", "S.R.O."}
def get_owner_means(owner_limits: Sequence[Any]):
if not isinstance(owner_limits, list):
return owner_limits
else:
return (owner_limits[0] + owner_limits[1]) / 2
def convert_owners_to_limits(owner_limit):
if not isinstance(owner_limit, str):
return owner_limit
owners_raw = [rev.replace(" ", "") for rev in owner_limit.split(" .. ")]
owners_clean = []
for owner_limit in owners_raw:
owner_limit = owner_limit.replace("M", "0" * 6)
owner_limit = owner_limit.replace("k", "0" * 3)
owners_clean.append(int(owner_limit))
return owners_clean
def split_companies(arr, illegal_continuations: Optional[Sequence[str]] = None):
"""
Splits the given string at comma sign as long as following the comma none of the illegal
continuations happen. In such a case, the string split does not happen that said comma.
:param arr: Array containing the developers/publishers for a single game
:param illegal_continuations: A list of illegal continuations. Must be uppercase.
:return: Returns the given split input string as a list.
:note: If the arr is numpy.NaN, this value is returned instead of a list.
"""
if illegal_continuations is None:
illegal_continuations = DEFAULT_ILLEGAL_CONTINUATIONS
if pd.isna(arr):
return arr
results_list = []
start_index = 0
split_char = ", "
for index in range(len(arr)):
if index < len(arr) - 1:
txt = arr[index:index + 2]
if txt == split_char:
found_illegal = False
min_continuation = min([len(continuation) for continuation in illegal_continuations])
max_continuation = max([len(continuation) for continuation in illegal_continuations])
next_chars = arr[index + min_continuation:index + min_continuation + max_continuation]
for i in range(index + min_continuation, index + len(next_chars) + 2):
comp_txt = arr[index + 2:i + 2].upper()
if comp_txt in illegal_continuations:
found_illegal = True
break
if not found_illegal:
results_list.append(arr[start_index:index].strip())
start_index = index + 1
elif index == len(arr) - 1:
results_list.append(arr[start_index:index + 1].strip())
return results_list
# The function to plot the bubble chart of market performance
def plot_market_performance(df, company_type: str, game_number_max: int, game_number_min: int):
"""
This function creates a bubble chart of market performance.
The variable company_type could only be developer or publisher.
The bubble size is game number, which is filtered by the range [game_number_min, game_number_max].
"""
df["owner_means"] = df["owners"].apply(lambda x: get_owner_means(convert_owners_to_limits(x)))
# Calulate the revenue
df["revenue"] = df["owner_means"]*df["price"]
# Split the companies
df["company_split"] = df[company_type].apply(lambda x: split_companies(x))
df_split = df.assign(developer=df['company_split'].str.split(', ')).explode('company_split')
# Calculate revenue and game number
company_stats = df_split.groupby('company_split').agg(revenue=('revenue', 'sum'), game_number=('company_split', 'count'), owner_number=('owner_means', 'sum')).reset_index()
# Filter the Game number
filter_company_stats = company_stats[(company_stats['game_number'] >= game_number_min) & (company_stats['game_number'] <= game_number_max)]
# Bubble chart
fig = px.scatter(filter_company_stats, x='revenue', y='owner_number', size='game_number', log_y=True, log_x=True,size_max = 40,
hover_name='company_split', title=f'Market Performance of {company_type}')
return fig
if __name__ == "__main__":
# Read data and calulate owner means
df = pd.read_csv("full_game_data.csv")
game_number_min = 1
game_number_max = 10
fig = plot_market_performance(df, 'developer', game_number_max, game_number_min)
fig.show()