Source code for pyvolleydata.helpers

import pandas as pd
from datetime import datetime


[docs] def get_data(league, seasons, data_type): """ Loads data for a specified league and season(s) from the volleydata repository. Parameters ---------- league : str A string specifying which of 'mlv', 'pvf', or 'au' to load data for. data_type : str The type of data to fetch (e.g., 'pbp', 'events_log') seasons : int, list of int, or None, optional Season(s) to load. By default, None loads all available seasons. - int : Single season year (e.g., 2025) - list of int : Multiple seasons (e.g., [2024, 2025]) - None : Load all available seasons Returns ------- pd.DataFrame A DataFrame containing the merged or filtered data with an additional 'league' column. Examples -------- >>> fetch_data(league='mlv', data_type='pbp', seasons=2024) >>> fetch_data(league='au', data_type='rosters', seasons=[2022, 2023]) >>> fetch_data(league='lovb', data_type='events_log') """ league_config = { 'mlv': {'start_year': 2024, 'internal_name': 'pvf'}, 'lovb': {'start_year': 2025, 'internal_name': 'lovb'}, 'au': {'start_year': 2022, 'internal_name': 'aupvb'} } if league in league_config: league_start_year = league_config.get(league).get('start_year') league_internal_name = league_config.get(league).get('internal_name') if isinstance(seasons, int): seasons = [seasons] if isinstance(seasons, list): validate_seasons(seasons, league_start_year) elif seasons is None: seasons = list(range(league_start_year, datetime.now().year + 1)) else: raise TypeError(f'Expected seasons to be an int, list of ints, or None, got {type(seasons).__name__}') base_url = "https://github.com/awosoga/volleydata/releases/download" df = pd.DataFrame() if data_type in {'pbp', 'events_log'}: for season in seasons: file_path = f"{league_internal_name}-{data_type.replace("_", "-")}/{league_internal_name}_{data_type}_{season}.csv" df = pd.concat([df, pd.read_csv(f"{base_url}/{file_path}")]) else: file_path = f"{league_internal_name}-{data_type.replace("_", "-")}/{league_internal_name}_{data_type}.csv" df = pd.read_csv(f"{base_url}/{file_path}") df = df[df['season'].isin(seasons)] else: raise TypeError("league must be one of 'mlv', 'lovb', or 'au'") df['league'] = league return df
[docs] def validate_seasons(seasons, league_start_year): """ Checks whether all the provided seasons are valid years and raises an error if not. Parameters ---------- seasons: list of int A list of years representing seasons to validate. league_start_year: int The starting year to determine if the seasons are within range. Returns ------- None Examples -------- >>> validate_seasons([2024, 2025], 2024) >>> validate_seasons(['2020'], 2024) # Raises TypeError >>> validate_seasons([2022, 2023, 2030], 2024) # Raises ValueError """ if not isinstance(seasons, list): raise TypeError('Expected a list of years') for year in seasons: if not isinstance(year, int): raise TypeError(f'Expected an integer for year, got {type(year).__name__}') if year < league_start_year or year > datetime.now().year: raise ValueError(f'Year {year} out of valid range for this league ({league_start_year}-{datetime.now().year})')