refactor for multiple series
This commit is contained in:
parent
afb5f6242b
commit
54096321f4
3 changed files with 1381 additions and 6 deletions
103
foo.py
Normal file
103
foo.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
import typing
|
||||
|
||||
import pandas
|
||||
from dataclasses import dataclass
|
||||
from math import floor
|
||||
from datetime import datetime
|
||||
|
||||
def load_score_log(path: str) -> pandas.DataFrame:
|
||||
return pandas.read_csv(path, sep=',',
|
||||
dtype={'score': int, 'sourcename': str, 'name': str, 'mapx': int, 'mapy': int},
|
||||
parse_dates=['when'], date_format='%d/%m/%Y %H:%M')
|
||||
|
||||
def generate_station_stats(score_log: pandas.DataFrame) -> pandas.DataFrame:
|
||||
# every station in the score log should have a first visit, so create summary based on that
|
||||
summary = score_log[score_log['sourcename'] == 'First Visit'][['name', 'mapx', 'mapy', 'when']]
|
||||
summary.rename(columns={'when': 'first_visit'}, inplace=True)
|
||||
|
||||
# add total score
|
||||
summary = pandas.merge(summary, score_log[['name', 'score']].groupby('name').sum(), on='name', validate='1:1')
|
||||
summary.rename(columns={'score': 'total_score'}, inplace=True)
|
||||
|
||||
boosts = score_log[score_log['sourcename'] == 'Output Boost'][['name', 'score']].groupby('name')
|
||||
|
||||
# add total boosts
|
||||
total_boosts = boosts.sum()
|
||||
total_boosts['totalboostduration'] = total_boosts['score'].apply(lambda x: 10 * x)
|
||||
total_boosts.rename(columns={'score': 'totalboostscore'}, inplace=True)
|
||||
summary = pandas.merge(summary, total_boosts, on='name')
|
||||
|
||||
# add max boosts
|
||||
max_boosts = boosts.max()
|
||||
max_boosts['maxboostduration'] = max_boosts['score'].apply(lambda x: 10 * x)
|
||||
max_boosts.rename(columns={'score': 'maxboostscore'}, inplace=True)
|
||||
summary = pandas.merge(summary, max_boosts, on='name')
|
||||
|
||||
visits = score_log[(score_log['sourcename'] == 'Visit') | (score_log['sourcename'] == 'First Visit')][['name', 'score']].groupby('name')
|
||||
|
||||
# add total visits (count)
|
||||
summary = pandas.merge(summary, visits.count(), on='name', validate='1:1')
|
||||
summary.rename(columns={'score': 'totalvisits'}, inplace=True)
|
||||
|
||||
captures = score_log[score_log['sourcename'] == 'Capture'][['name', 'score']].groupby('name')
|
||||
|
||||
# add captures (count)
|
||||
summary = pandas.merge(summary, captures.count(), on='name', validate='1:1')
|
||||
summary.rename(columns={'score': 'captures'}, inplace=True)
|
||||
|
||||
# add max held duration (max capture score)
|
||||
summary = pandas.merge(summary, captures.max(), on='name', validate='1:1')
|
||||
summary.rename(columns={'score': 'maxheldduration'}, inplace=True)
|
||||
|
||||
# add total held duration (sum capture score)
|
||||
summary = pandas.merge(summary, captures.sum(), on='name', validate='1:1')
|
||||
summary.rename(columns={'score': 'totalheldduration'}, inplace=True)
|
||||
|
||||
return summary
|
||||
|
||||
def generate_score_per_second(score_log: pandas.DataFrame) -> pandas.DataFrame:
|
||||
@dataclass
|
||||
class ScoreSecond:
|
||||
name: str
|
||||
sourcename: str
|
||||
when: datetime
|
||||
score: float
|
||||
once: bool
|
||||
mapx: int
|
||||
mapy: int
|
||||
|
||||
def row_to_scoreseconds(row, score_per) -> typing.Iterator[ScoreSecond]:
|
||||
seconds = int(floor(row.score / score_per))
|
||||
assert row.score % score_per < 0.1
|
||||
when = int(floor(row.when.timestamp()))
|
||||
for elapsed in range(0, seconds):
|
||||
timestamp = pandas.Timestamp(when - elapsed, unit='s')
|
||||
yield ScoreSecond(name=row.name, sourcename=row.sourcename, mapx=row.mapx, mapy=row.mapy, when=timestamp,
|
||||
score=score_per, once=False)
|
||||
|
||||
def gen_scoreseconds() -> typing.Iterator[ScoreSecond]:
|
||||
for row in score_log.itertuples():
|
||||
if row.sourcename == "Capture":
|
||||
yield from row_to_scoreseconds(row, 1.0)
|
||||
elif row.sourcename == "Output Boost":
|
||||
yield from row_to_scoreseconds(row, 0.1)
|
||||
else: # one-off
|
||||
yield ScoreSecond(name=row.name, sourcename=row.sourcename, mapx=row.mapx, mapy=row.mapy, when=row.when,
|
||||
score=row.score, once=True)
|
||||
|
||||
scoreseconds = pandas.DataFrame(gen_scoreseconds())
|
||||
scoreseconds.sort_values(by=['when'], inplace=True)
|
||||
scoreseconds.reset_index(drop=True, inplace=True)
|
||||
|
||||
acc_col = pandas.Series([0.0]).repeat(len(scoreseconds)).reset_index(drop=True)
|
||||
|
||||
acc = 0.0
|
||||
for i, row in enumerate(scoreseconds.itertuples()):
|
||||
acc += row.score
|
||||
acc_col[i] = acc
|
||||
|
||||
scoreseconds['accumulated_score'] = acc_col
|
||||
del acc
|
||||
del acc_col
|
||||
|
||||
return scoreseconds
|
Loading…
Add table
Add a link
Reference in a new issue