Usage Guide: Analyzing Volleyball Data

Overview

This guide details how to use pyvolleydata to access clean, standardized data from Major League Volleyball (MLV), League One Volleyball (LOVB), and Athlete Unlimited Pro Volleyball (AUPVB)

Installation

You can install the pyvolleydata package with:

$ pip install pyvolleydata

Getting Started

To use pyvolleydata in a project, start by importing the package:

import pyvolleydata
print(pyvolleydata.__version__)
1.0.0

Accessing League Data

All loading functions in pyvolleydata require two arguments:

  • league: One of 'mlv', 'lovb', or 'au'.

  • seasons: The year of the season (e.g., 2025).

Schedule

load_schedule(league, seasons)

from pyvolleydata.get_data import load_schedule

# Get the lovb schedule for 2025
schedule = load_schedule('lovb', 2025)
print(schedule.head())
   season        date home_team  away_team  ...  result  match_id   phase  league
0    2025  2025-01-08   Atlanta  Salt Lake  ...     1:3   2161068  Week 1    lovb
1    2025  2025-01-09   Houston     Austin  ...     3:2   2161288  Week 1    lovb
2    2025  2025-01-10    Austin    Madison  ...     3:0   2161289  Week 1    lovb
3    2025  2025-01-10   Houston   Nebraska  ...     0:3   2161290  Week 1    lovb
4    2025  2025-01-15    Austin    Atlanta  ...     1:3   2161291  Week 2    lovb

[5 rows x 10 columns]

Officials

load_mlv_officials(league, seasons)

from pyvolleydata.get_data import load_officials

# Get the mlv official data for all seasons
officials = load_officials('mlv')
print(officials.head())
   match_id  season            match_datetime  ... last_name level league
0   2125268    2024  2024-01-26T00:00:00.000Z  ...    Prater   USA    mlv
1   2125268    2024  2024-01-26T00:00:00.000Z  ...   McLarty   USA    mlv
2   2125268    2024  2024-01-26T00:00:00.000Z  ...      Chen   USA    mlv
3   2125268    2024  2024-01-26T00:00:00.000Z  ...  TerMolen   USA    mlv
4   2125270    2024  2024-02-02T00:00:00.000Z  ...    Prater    AA    mlv

[5 rows x 9 columns]

Player Information

load_player_info(league, seasons)

from pyvolleydata.get_data import load_player_info

# Get the au player-info data for 2025
player_info = load_player_info('au', 2025)
print(player_info.head())
      match_id  season  ...     team_color  league
2279   2249759    2025  ...  cadmiumOrange      au
2280   2249759    2025  ...  cadmiumOrange      au
2281   2249759    2025  ...  cadmiumOrange      au
2282   2249759    2025  ...  cadmiumOrange      au
2283   2249759    2025  ...  cadmiumOrange      au

[5 rows x 29 columns]

Team Staff

load_team_staff(league, seasons)

from pyvolleydata.get_data import load_team_staff

# Get the mlv team-staff data for 2024 and 2025
team_staff = load_team_staff('mlv', [2024, 2025])
print(team_staff.head())
   match_id  season            match_datetime  ... first_name last_name league
0   2125268    2024  2024-01-26T00:00:00.000Z  ...      Cathy    George    mlv
1   2125268    2024  2024-01-26T00:00:00.000Z  ...       Bill    Walton    mlv
2   2125268    2024  2024-01-26T00:00:00.000Z  ...      Denis  Dimitrov    mlv
3   2125268    2024  2024-01-26T00:00:00.000Z  ...      Angel     Perez    mlv
4   2125268    2024  2024-01-26T00:00:00.000Z  ...     Carlos   Cardona    mlv

[5 rows x 9 columns]

Play-by-Play

load_pbp(league, seasons)

from pyvolleydata.get_data import load_pbp

# Get the lovb pbp data for all seasons
pbp = load_pbp('lovb')
print(pbp.head())
   match_id  season            match_datetime  ... home_score away_score league
0   2161068    2025  2025-01-09T00:30:00.000Z  ...          1          0   lovb
1   2161068    2025  2025-01-09T00:30:00.000Z  ...          1          0   lovb
2   2161068    2025  2025-01-09T00:30:00.000Z  ...          1          0   lovb
3   2161068    2025  2025-01-09T00:30:00.000Z  ...          1          0   lovb
4   2161068    2025  2025-01-09T00:30:00.000Z  ...          1          0   lovb

[5 rows x 15 columns]

Events Log

load_events_log(league, seasons)

from pyvolleydata.get_data import load_events_log

# Get the au events log data for 2025
events_log = load_events_log('au', 2025)
print(events_log.head())
   match_id  season  ... verified_method  league
0   2249759    2025  ...             NaN      au
1   2249759    2025  ...             NaN      au
2   2249759    2025  ...             NaN      au
3   2249759    2025  ...             NaN      au
4   2249759    2025  ...             NaN      au

[5 rows x 45 columns]

Player Boxscore

load_player_boxscore(league, seasons)

from pyvolleydata.get_data import load_player_boxscore

# Get the mlv player-boxscore data for 2025
player_boxscore = load_player_boxscore('mlv', 2025)
print(player_boxscore.head())
      match_id  season            match_datetime  ... spike_hp points league
6881   2160916    2025  2025-01-10T00:00:00.000Z  ...      1.0    0.0    mlv
6882   2160916    2025  2025-01-10T00:00:00.000Z  ...      0.0    0.0    mlv
6883   2160916    2025  2025-01-10T00:00:00.000Z  ...      0.0    2.0    mlv
6884   2160916    2025  2025-01-10T00:00:00.000Z  ...      NaN    NaN    mlv
6885   2160916    2025  2025-01-10T00:00:00.000Z  ...      NaN    NaN    mlv

[5 rows x 37 columns]

Team Boxscore

load_team_boxscore(league, seasons)

from pyvolleydata.get_data import load_team_boxscore

# Get the lovb team-boxscore data for 2025
team_boxscore = load_team_boxscore('lovb', 2025)
print(team_boxscore.head())
   match_id  season            match_datetime  ... spike_hp points  league
0   2161068    2025  2025-01-09T00:30:00.000Z  ...        1      6    lovb
1   2161068    2025  2025-01-09T00:30:00.000Z  ...        1      6    lovb
2   2161068    2025  2025-01-09T00:30:00.000Z  ...        3     10    lovb
3   2161068    2025  2025-01-09T00:30:00.000Z  ...        2      4    lovb
4   2161068    2025  2025-01-09T00:30:00.000Z  ...        0      6    lovb

[5 rows x 29 columns]