Skip to content

Commit

Permalink
Merge branch 'main' into concat_str
Browse files Browse the repository at this point in the history
  • Loading branch information
dannyi96 authored Feb 16, 2025
2 parents 1c7ce4a + 6bcd303 commit f2ae048
Show file tree
Hide file tree
Showing 187 changed files with 2,012 additions and 831 deletions.
32 changes: 13 additions & 19 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ jobs:

services:
mysql:
image: mysql:8
image: mysql:9
env:
MYSQL_ALLOW_EMPTY_PASSWORD: yes
MYSQL_DATABASE: pandas
Expand All @@ -120,7 +120,7 @@ jobs:
- 3306:3306

postgres:
image: postgres:16
image: postgres:17
env:
PGUSER: postgres
POSTGRES_USER: postgres
Expand All @@ -135,7 +135,7 @@ jobs:
- 5432:5432

moto:
image: motoserver/moto:5.0.0
image: motoserver/moto:5.0.27
env:
AWS_ACCESS_KEY_ID: foobar_key
AWS_SECRET_ACCESS_KEY: foobar_secret
Expand Down Expand Up @@ -242,15 +242,14 @@ jobs:
- name: Build environment and Run Tests
# https://github.com/numpy/numpy/issues/24703#issuecomment-1722379388
run: |
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
/opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install numpy -Csetup-args="-Dallow-noblas=true"
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0
python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror"
python -m pip list --no-cache-dir
export PANDAS_CI=1
python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-32bit
Expand All @@ -259,7 +258,7 @@ jobs:
Linux-Musl:
runs-on: ubuntu-22.04
container:
image: quay.io/pypa/musllinux_1_1_x86_64
image: quay.io/pypa/musllinux_1_2_x86_64
steps:
- name: Checkout pandas Repo
# actions/checkout does not work since it requires node
Expand All @@ -281,7 +280,7 @@ jobs:
apk add musl-locales
- name: Build environment
run: |
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
/opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0
Expand All @@ -291,8 +290,7 @@ jobs:
- name: Run Tests
run: |
. ~/virtualenvs/pandas-dev/bin/activate
export PANDAS_CI=1
python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl
Expand Down Expand Up @@ -357,8 +355,7 @@ jobs:
python --version
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
python -m pip install versioneer[toml]
python -m pip install python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
python -m pip install versioneer[toml] python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror"
python -m pip list
Expand All @@ -375,7 +372,7 @@ jobs:

concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-python-freethreading-dev
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-python-freethreading-dev
cancel-in-progress: true

env:
Expand All @@ -396,14 +393,11 @@ jobs:
nogil: true

- name: Build Environment
# TODO: Once numpy 2.2.1 is out, don't install nightly version
# Tests segfault with numpy 2.2.0: https://github.com/numpy/numpy/pull/27955
run: |
python --version
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython numpy
python -m pip install versioneer[toml]
python -m pip install python-dateutil pytz tzdata hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
python -m pip install --upgrade pip setuptools wheel numpy meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
python -m pip install versioneer[toml] python-dateutil pytz tzdata hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror"
python -m pip list
Expand Down
15 changes: 10 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ci:
skip: [pyright, mypy]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.6
rev: v0.9.4
hooks:
- id: ruff
args: [--exit-non-zero-on-fix]
Expand All @@ -41,7 +41,7 @@ repos:
pass_filenames: true
require_serial: false
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
rev: v2.4.1
hooks:
- id: codespell
types_or: [python, rst, markdown, cython, c]
Expand Down Expand Up @@ -70,7 +70,7 @@ repos:
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
rev: 6.0.0
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
Expand All @@ -95,17 +95,22 @@ repos:
- id: sphinx-lint
args: ["--enable", "all", "--disable", "line-too-long"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v19.1.6
rev: v19.1.7
hooks:
- id: clang-format
files: ^pandas/_libs/src|^pandas/_libs/include
args: [-i]
types_or: [c, c++]
- repo: https://github.com/trim21/pre-commit-mirror-meson
rev: v1.6.1
rev: v1.7.0
hooks:
- id: meson-fmt
args: ['--inplace']
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.10.0.1
hooks:
- id: shellcheck
args: ["--severity=warning"]
- repo: local
hooks:
- id: pyright
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/io/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class Render:
def setup(self, cols, rows):
self.df = DataFrame(
np.random.randn(rows, cols),
columns=[f"float_{i+1}" for i in range(cols)],
index=[f"row_{i+1}" for i in range(rows)],
columns=[f"float_{i + 1}" for i in range(cols)],
index=[f"row_{i + 1}" for i in range(rows)],
)

def time_apply_render(self, cols, rows):
Expand Down
18 changes: 9 additions & 9 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ else
fi

[[ -z "$CHECK" || "$CHECK" == "code" || "$CHECK" == "doctests" || "$CHECK" == "docstrings" || "$CHECK" == "single-docs" || "$CHECK" == "notebooks" ]] || \
{ echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|single-docs|notebooks]"; exit 9999; }
{ echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|single-docs|notebooks]"; exit 1; }

BASE_DIR="$(dirname $0)/.."
BASE_DIR="$(dirname "$0")/.."
RET=0

### CODE ###
if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then

MSG='Check import. No warnings, and blocklist some optional dependencies' ; echo $MSG
MSG='Check import. No warnings, and blocklist some optional dependencies' ; echo "$MSG"
python -W error -c "
import sys
import pandas
Expand All @@ -49,24 +49,24 @@ if mods:
sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
sys.exit(len(mods))
"
RET=$(($RET + $?)) ; echo $MSG "DONE"
RET=$(($RET + $?)) ; echo "$MSG" "DONE"

fi

### DOCTESTS ###
if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then

MSG='Python and Cython Doctests' ; echo $MSG
MSG='Python and Cython Doctests' ; echo "$MSG"
python -c 'import pandas as pd; pd.test(run_doctests=True)'
RET=$(($RET + $?)) ; echo $MSG "DONE"
RET=$(($RET + $?)) ; echo "$MSG" "DONE"

fi

### DOCSTRINGS ###
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then

MSG='Validate Docstrings' ; echo $MSG
$BASE_DIR/scripts/validate_docstrings.py \
MSG='Validate Docstrings' ; echo "$MSG"
"$BASE_DIR"/scripts/validate_docstrings.py \
--format=actions \
-i ES01 `# For now it is ok if docstrings are missing the extended summary` \
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
Expand Down Expand Up @@ -265,7 +265,7 @@ fi
if [[ -z "$CHECK" || "$CHECK" == "notebooks" ]]; then

MSG='Notebooks' ; echo $MSG
jupyter nbconvert --execute $(find doc/source -name '*.ipynb') --to notebook
jupyter nbconvert --execute "$(find doc/source -name '*.ipynb')" --to notebook
RET=$(($RET + $?)) ; echo $MSG "DONE"

fi
Expand Down
8 changes: 3 additions & 5 deletions ci/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
# Workaround for pytest-xdist (it collects different tests in the workers if PYTHONHASHSEED is not set)
# https://github.com/pytest-dev/pytest/issues/920
# https://github.com/pytest-dev/pytest/issues/1075
export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')

# May help reproduce flaky CI builds if set in subsequent runs
echo PYTHONHASHSEED=$PYTHONHASHSEED
PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
export PYTHONHASHSEED

COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml"

Expand All @@ -16,5 +14,5 @@ if [[ "$PATTERN" ]]; then
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
fi

echo $PYTEST_CMD
echo "$PYTEST_CMD"
sh -c "$PYTEST_CMD"
9 changes: 5 additions & 4 deletions ci/upload_wheels.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/bash
# Modified from numpy's https://github.com/numpy/numpy/blob/main/tools/wheels/upload_wheels.sh

set_upload_vars() {
Expand All @@ -19,20 +20,20 @@ set_upload_vars() {
fi
}
upload_wheels() {
echo ${PWD}
echo "${PWD}"
if [[ ${ANACONDA_UPLOAD} == true ]]; then
if [ -z ${TOKEN} ]; then
if [ -z "${TOKEN}" ]; then
echo no token set, not uploading
else
# sdists are located under dist folder when built through setup.py
if compgen -G "./dist/*.gz"; then
echo "Found sdist"
anaconda -q -t ${TOKEN} upload --skip -u ${ANACONDA_ORG} ./dist/*.gz
anaconda -q -t "${TOKEN}" upload --skip -u "${ANACONDA_ORG}" ./dist/*.gz
echo "Uploaded sdist"
fi
if compgen -G "./wheelhouse/*.whl"; then
echo "Found wheel"
anaconda -q -t ${TOKEN} upload --skip -u ${ANACONDA_ORG} ./wheelhouse/*.whl
anaconda -q -t "${TOKEN}" upload --skip -u "${ANACONDA_ORG}" ./wheelhouse/*.whl
echo "Uploaded wheel"
fi
echo "PyPI-style index: https://pypi.anaconda.org/$ANACONDA_ORG/simple"
Expand Down
6 changes: 2 additions & 4 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,7 @@ def latex(self, force=False):
for i in range(3):
self._run_os("pdflatex", "-interaction=nonstopmode", "pandas.tex")
raise SystemExit(
"You should check the file "
'"build/latex/pandas.pdf" for problems.'
'You should check the file "build/latex/pandas.pdf" for problems.'
)
self._run_os("make")
return ret_code
Expand Down Expand Up @@ -343,8 +342,7 @@ def main():
dest="verbosity",
default=0,
help=(
"increase verbosity (can be repeated), "
"passed to the sphinx build command"
"increase verbosity (can be repeated), passed to the sphinx build command"
),
)
argparser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion doc/source/development/contributing_codebase.rst
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ be located.
- tests.scalar
- tests.tseries.offsets

2. Does your test depend only on code in pd._libs?
2. Does your test depend only on code in ``pd._libs``?
This test likely belongs in one of:

- tests.libs
Expand Down
2 changes: 1 addition & 1 deletion doc/source/development/developer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ Column metadata
* Boolean: ``'bool'``
* Integers: ``'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'``
* Floats: ``'float16', 'float32', 'float64'``
* Date and Time Types: ``'datetime', 'datetimetz'``, ``'timedelta'``
* Date and Time Types: ``'datetime', 'datetimetz', 'timedelta'``
* String: ``'unicode', 'bytes'``
* Categorical: ``'categorical'``
* Other Python objects: ``'object'``
Expand Down
1 change: 1 addition & 0 deletions doc/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Attributes
Series.array
Series.values
Series.dtype
Series.info
Series.shape
Series.nbytes
Series.ndim
Expand Down
2 changes: 2 additions & 0 deletions doc/source/reference/style.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Styler properties
Styler.template_html_style
Styler.template_html_table
Styler.template_latex
Styler.template_typst
Styler.template_string
Styler.loader

Expand Down Expand Up @@ -77,6 +78,7 @@ Style export and import

Styler.to_html
Styler.to_latex
Styler.to_typst
Styler.to_excel
Styler.to_string
Styler.export
Expand Down
4 changes: 2 additions & 2 deletions doc/source/user_guide/cookbook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,7 @@ Timeseries
<https://stackoverflow.com/questions/13893227/vectorized-look-up-of-values-in-pandas-dataframe>`__

`Aggregation and plotting time series
<https://nipunbatra.github.io/blog/visualisation/2013/05/01/aggregation-timeseries.html>`__
<https://nipunbatra.github.io/blog/posts/2013-05-01-aggregation-timeseries.html>`__

Turn a matrix with hours in columns and days in rows into a continuous row sequence in the form of a time series.
`How to rearrange a Python pandas DataFrame?
Expand Down Expand Up @@ -1043,7 +1043,7 @@ CSV

The :ref:`CSV <io.read_csv_table>` docs

`read_csv in action <https://wesmckinney.com/blog/update-on-upcoming-pandas-v0-10-new-file-parser-other-performance-wins/>`__
`read_csv in action <https://www.datacamp.com/tutorial/pandas-read-csv>`__

`appending to a csv
<https://stackoverflow.com/questions/17134942/pandas-dataframe-output-end-of-csv>`__
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/enhancingperf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ prefer that Numba throw an error if it cannot compile a function in a way that
speeds up your code, pass Numba the argument
``nopython=True`` (e.g. ``@jit(nopython=True)``). For more on
troubleshooting Numba modes, see the `Numba troubleshooting page
<https://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#the-compiled-code-is-too-slow>`__.
<https://numba.readthedocs.io/en/stable/user/troubleshoot.html>`__.

Using ``parallel=True`` (e.g. ``@jit(parallel=True)``) may result in a ``SIGABRT`` if the threading layer leads to unsafe
behavior. You can first `specify a safe threading layer <https://numba.readthedocs.io/en/stable/user/threading-layer.html#selecting-a-threading-layer-for-safe-parallel-execution>`__
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/gotchas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -372,5 +372,5 @@ constructors using something similar to the following:
s = pd.Series(newx)
See `the NumPy documentation on byte order
<https://numpy.org/doc/stable/user/basics.byteswapping.html>`__ for more
<https://numpy.org/doc/stable/user/byteswapping.html>`__ for more
details.
2 changes: 1 addition & 1 deletion doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ You can also include the grouping columns if you want to operate on them.
.. note::

The ``groupby`` operation in Pandas drops the ``name`` field of the columns Index object
The ``groupby`` operation in pandas drops the ``name`` field of the columns Index object
after the operation. This change ensures consistency in syntax between different
column selection methods within groupby operations.

Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,7 @@ Thousand separators

For large numbers that have been written with a thousands separator, you can
set the ``thousands`` keyword to a string of length 1 so that integers will be parsed
correctly:
correctly.

By default, numbers with a thousands separator will be parsed as strings:

Expand Down
Loading

0 comments on commit f2ae048

Please sign in to comment.