-
Notifications
You must be signed in to change notification settings - Fork 1
tests fails with python 3.14 on alpine linux (x86_64) #40
Copy link
Copy link
Open
Description
============================= test session starts ==============================
platform linux -- Python 3.14.3, pytest-9.0.2, pluggy-1.6.0
rootdir: /home/ncopa/aports/community/py3-arm-preprocessing/src/arm-preprocessing-0.2.5
configfile: pyproject.toml
plugins: xdist-3.8.0
collected 35 items
tests/test_dataset.py .F.....FF.FF...FF....FFFFFFFF [ 82%]
tests/test_discretisation.py .... [ 94%]
tests/test_squashing.py .. [100%]
=================================== FAILURES ===================================
_________________________ test_load_data_txt_datetime __________________________
def test_load_data_txt_datetime():
# Test loading CSV without datetime columns
dataset = Dataset(
'datasets/measures2', format='txt', datetime_columns=['date', 'time']
)
> dataset.load()
tests/test_dataset.py:21:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:63: in load
data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
return _read(filepath_or_buffer, kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
self._engine = self._make_engine(f, self.engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
return mapping[engine](f, **self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]
def validate_parse_dates_presence(
parse_dates: bool | list, columns: Sequence[Hashable]
) -> set:
"""
Check if parse_dates are in columns.
If user has provided names for parse_dates, check if those columns
are available.
Parameters
----------
columns : list
List of names of the dataframe.
Returns
-------
The names of the columns which will get parsed later if a list
is given as specification.
Raises
------
ValueError
If column to parse_date is not in dataframe.
"""
if not isinstance(parse_dates, list):
return set()
missing = set()
unique_cols = set()
for col in parse_dates:
if isinstance(col, str):
if col not in columns:
missing.add(col)
else:
unique_cols.add(col)
elif col in columns:
unique_cols.add(col)
else:
> unique_cols.add(columns[col])
^^^^^^^^^^^^
E TypeError: list indices must be integers or slices, not list
/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_______________________ test_identify_dataset_timeseries _______________________
def test_identify_dataset_timeseries():
# Test identifying time-series dataset
dataset = Dataset(
'datasets/measures2', format='txt', datetime_columns=['date', 'time']
)
> dataset.load()
tests/test_dataset.py:88:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:63: in load
data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
return _read(filepath_or_buffer, kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
self._engine = self._make_engine(f, self.engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
return mapping[engine](f, **self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]
def validate_parse_dates_presence(
parse_dates: bool | list, columns: Sequence[Hashable]
) -> set:
"""
Check if parse_dates are in columns.
If user has provided names for parse_dates, check if those columns
are available.
Parameters
----------
columns : list
List of names of the dataframe.
Returns
-------
The names of the columns which will get parsed later if a list
is given as specification.
Raises
------
ValueError
If column to parse_date is not in dataframe.
"""
if not isinstance(parse_dates, list):
return set()
missing = set()
unique_cols = set()
for col in parse_dates:
if isinstance(col, str):
if col not in columns:
missing.add(col)
else:
unique_cols.add(col)
elif col in columns:
unique_cols.add(col)
else:
> unique_cols.add(columns[col])
^^^^^^^^^^^^
E TypeError: list indices must be integers or slices, not list
/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_________________________ test_identify_dataset_mixed __________________________
def test_identify_dataset_mixed():
# Test identifying mixed dataset
dataset = Dataset('datasets/artm_test_dataset', format='json')
dataset.load()
> assert dataset.information['type'] == 'mixed'
E AssertionError: assert 'numerical' == 'mixed'
E
E - mixed
E + numerical
tests/test_dataset.py:96: AssertionError
______________________ test_identify_dataset_categorical _______________________
def test_identify_dataset_categorical():
# Test identifying categorical dataset
dataset = Dataset('datasets/breast', format='csv')
dataset.load()
> assert dataset.information['type'] == 'categorical'
E AssertionError: assert 'numerical' == 'categorical'
E
E - categorical
E + numerical
tests/test_dataset.py:110: AssertionError
__________________________ test_missing_values_impute __________________________
def test_missing_values_impute():
# Test imputing missing values
dataset = Dataset('examples/missing_values/data', format='csv')
dataset.load()
> dataset.missing_values(method='impute')
tests/test_dataset.py:117:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:218: in missing_values
self.data[column].mean(), inplace=True)
^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/util/_decorators.py:336: in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/series.py:8113: in mean
return NDFrame.mean(
/usr/lib/python3.14/site-packages/pandas/core/generic.py:11831: in mean
return self._stat_function(
/usr/lib/python3.14/site-packages/pandas/core/generic.py:11785: in _stat_function
return self._reduce(
/usr/lib/python3.14/site-packages/pandas/core/series.py:7480: in _reduce
result = delegate._reduce(name, skipna=skipna, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <StringArray>
['John', 'Jane', nan, 'Michael', 'Sara']
Length: 5, dtype: str
name = 'mean', skipna = True, keepdims = False, axis = 0, kwargs = {}
def _reduce(
self,
name: str,
*,
skipna: bool = True,
keepdims: bool = False,
axis: AxisInt | None = 0,
**kwargs,
):
if self.dtype.na_value is np.nan and name in ["any", "all"]:
if name == "any":
return nanops.nanany(self._ndarray, skipna=skipna)
else:
return nanops.nanall(self._ndarray, skipna=skipna)
if name in ["min", "max", "argmin", "argmax", "sum"]:
result = getattr(self, name)(skipna=skipna, axis=axis, **kwargs)
if keepdims:
return self._from_sequence([result], dtype=self.dtype)
return result
> raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
E TypeError: Cannot perform reduction 'mean' with string dtype
/usr/lib/python3.14/site-packages/pandas/core/arrays/string_.py:967: TypeError
______________________ test_feature_scaling_normalisation ______________________
def test_feature_scaling_normalisation():
# Test feature scaling using normalisation
dataset = Dataset('datasets/Abalone', format='csv')
dataset.load()
> dataset.scale(method='normalisation')
tests/test_dataset.py:149:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:290: in scale
self.data[column] - self.data[column].min()
/usr/lib/python3.14/site-packages/pandas/core/ops/common.py:85: in new_method
return method(self, other)
^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/arraylike.py:198: in __sub__
return self._arith_method(other, operator.sub)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/series.py:6751: in _arith_method
return base.IndexOpsMixin._arith_method(self, other, op)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/base.py:1644: in _arith_method
result = ops.arithmetic_op(lvalues, rvalues, op)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/ops/array_ops.py:279: in arithmetic_op
res_values = op(left, right)
^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/ops/common.py:85: in new_method
return method(self, other)
^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/arraylike.py:198: in __sub__
return self._arith_method(other, operator.sub)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <StringArray>
['M', 'M', 'F', 'M', 'I', 'I', 'F', 'F', 'M', 'F',
...
'M', 'F', 'M', 'M', 'M', 'F', 'M', 'M', 'F', 'M']
Length: 4177, dtype: str
other = 'F', op = <built-in function sub>
def _cmp_method(self, other, op):
from pandas.arrays import (
ArrowExtensionArray,
BooleanArray,
)
if (
isinstance(other, BaseStringArray)
and self.dtype.na_value is not libmissing.NA
and other.dtype.na_value is libmissing.NA
):
# NA has priority of NaN semantics
return op(self.astype(other.dtype, copy=False), other)
if isinstance(other, ArrowExtensionArray):
if isinstance(other, BaseStringArray):
# pyarrow storage has priority over python storage
# (except if we have NA semantics and other not)
if not (
self.dtype.na_value is libmissing.NA
and other.dtype.na_value is not libmissing.NA
):
return NotImplemented
else:
return NotImplemented
if isinstance(other, StringArray):
other = other._ndarray
mask = isna(self) | isna(other)
valid = ~mask
if lib.is_list_like(other):
if len(other) != len(self):
# prevent improper broadcasting when other is 2D
raise ValueError(
f"Lengths of operands do not match: {len(self)} != {len(other)}"
)
# for array-likes, first filter out NAs before converting to numpy
if not is_array_like(other):
other = np.asarray(other)
other = other[valid]
other_dtype = getattr(other, "dtype", None)
if op.__name__.strip("_") in ["mul", "rmul"] and (
lib.is_bool(other) or lib.is_np_dtype(other_dtype, "b")
):
# GH#62595
raise TypeError(
"Cannot multiply StringArray by bools. "
"Explicitly cast to integers instead."
)
if op.__name__ in ops.ARITHMETIC_BINOPS:
result = np.empty_like(self._ndarray, dtype="object")
result[mask] = self.dtype.na_value
> result[valid] = op(self._ndarray[valid], other)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E TypeError: unsupported operand type(s) for -: 'str' and 'str'
/usr/lib/python3.14/site-packages/pandas/core/arrays/string_.py:1212: TypeError
_____________________ test_feature_scaling_standardisation _____________________
def test_feature_scaling_standardisation():
# Test feature scaling using standardisation
dataset = Dataset('datasets/Abalone', format='csv')
dataset.load()
> dataset.scale(method='standardisation')
tests/test_dataset.py:162:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:294: in scale
self.data[column] - self.data[column].mean()
^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/util/_decorators.py:336: in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/series.py:8113: in mean
return NDFrame.mean(
/usr/lib/python3.14/site-packages/pandas/core/generic.py:11831: in mean
return self._stat_function(
/usr/lib/python3.14/site-packages/pandas/core/generic.py:11785: in _stat_function
return self._reduce(
/usr/lib/python3.14/site-packages/pandas/core/series.py:7480: in _reduce
result = delegate._reduce(name, skipna=skipna, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <StringArray>
['M', 'M', 'F', 'M', 'I', 'I', 'F', 'F', 'M', 'F',
...
'M', 'F', 'M', 'M', 'M', 'F', 'M', 'M', 'F', 'M']
Length: 4177, dtype: str
name = 'mean', skipna = True, keepdims = False, axis = 0, kwargs = {}
def _reduce(
self,
name: str,
*,
skipna: bool = True,
keepdims: bool = False,
axis: AxisInt | None = 0,
**kwargs,
):
if self.dtype.na_value is np.nan and name in ["any", "all"]:
if name == "any":
return nanops.nanany(self._ndarray, skipna=skipna)
else:
return nanops.nanall(self._ndarray, skipna=skipna)
if name in ["min", "max", "argmin", "argmax", "sum"]:
result = getattr(self, name)(skipna=skipna, axis=axis, **kwargs)
if keepdims:
return self._from_sequence([result], dtype=self.dtype)
return result
> raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
E TypeError: Cannot perform reduction 'mean' with string dtype
/usr/lib/python3.14/site-packages/pandas/core/arrays/string_.py:967: TypeError
__________________________ test_filter_between_dates ___________________________
def test_filter_between_dates():
# Test filtering between dates
dataset = Dataset(
'datasets/measures2', format='txt', datetime_columns=['date', 'time']
)
> dataset.load()
tests/test_dataset.py:213:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:63: in load
data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
return _read(filepath_or_buffer, kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
self._engine = self._make_engine(f, self.engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
return mapping[engine](f, **self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]
def validate_parse_dates_presence(
parse_dates: bool | list, columns: Sequence[Hashable]
) -> set:
"""
Check if parse_dates are in columns.
If user has provided names for parse_dates, check if those columns
are available.
Parameters
----------
columns : list
List of names of the dataframe.
Returns
-------
The names of the columns which will get parsed later if a list
is given as specification.
Raises
------
ValueError
If column to parse_date is not in dataframe.
"""
if not isinstance(parse_dates, list):
return set()
missing = set()
unique_cols = set()
for col in parse_dates:
if isinstance(col, str):
if col not in columns:
missing.add(col)
else:
unique_cols.add(col)
elif col in columns:
unique_cols.add(col)
else:
> unique_cols.add(columns[col])
^^^^^^^^^^^^
E TypeError: list indices must be integers or slices, not list
/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
____________________________ test_filter_by_minute _____________________________
def test_filter_by_minute():
# Test filtering by minute
dataset = Dataset(
'datasets/measures2', format='txt', datetime_columns=['date', 'time']
)
> dataset.load()
tests/test_dataset.py:243:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:63: in load
data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
return _read(filepath_or_buffer, kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
self._engine = self._make_engine(f, self.engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
return mapping[engine](f, **self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]
def validate_parse_dates_presence(
parse_dates: bool | list, columns: Sequence[Hashable]
) -> set:
"""
Check if parse_dates are in columns.
If user has provided names for parse_dates, check if those columns
are available.
Parameters
----------
columns : list
List of names of the dataframe.
Returns
-------
The names of the columns which will get parsed later if a list
is given as specification.
Raises
------
ValueError
If column to parse_date is not in dataframe.
"""
if not isinstance(parse_dates, list):
return set()
missing = set()
unique_cols = set()
for col in parse_dates:
if isinstance(col, str):
if col not in columns:
missing.add(col)
else:
unique_cols.add(col)
elif col in columns:
unique_cols.add(col)
else:
> unique_cols.add(columns[col])
^^^^^^^^^^^^
E TypeError: list indices must be integers or slices, not list
/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_____________________________ test_filter_by_hour ______________________________
def test_filter_by_hour():
# Test filtering by hour
dataset = Dataset(
'datasets/measures2', format='txt', datetime_columns=['date', 'time']
)
> dataset.load()
tests/test_dataset.py:266:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:63: in load
data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
return _read(filepath_or_buffer, kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
self._engine = self._make_engine(f, self.engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
return mapping[engine](f, **self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]
def validate_parse_dates_presence(
parse_dates: bool | list, columns: Sequence[Hashable]
) -> set:
"""
Check if parse_dates are in columns.
If user has provided names for parse_dates, check if those columns
are available.
Parameters
----------
columns : list
List of names of the dataframe.
Returns
-------
The names of the columns which will get parsed later if a list
is given as specification.
Raises
------
ValueError
If column to parse_date is not in dataframe.
"""
if not isinstance(parse_dates, list):
return set()
missing = set()
unique_cols = set()
for col in parse_dates:
if isinstance(col, str):
if col not in columns:
missing.add(col)
else:
unique_cols.add(col)
elif col in columns:
unique_cols.add(col)
else:
> unique_cols.add(columns[col])
^^^^^^^^^^^^
E TypeError: list indices must be integers or slices, not list
/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
______________________________ test_filter_by_day ______________________________
def test_filter_by_day():
# Test filtering by day
dataset = Dataset(
'datasets/measures2', format='txt', datetime_columns=['date', 'time']
)
> dataset.load()
tests/test_dataset.py:289:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:63: in load
data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
return _read(filepath_or_buffer, kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
self._engine = self._make_engine(f, self.engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
return mapping[engine](f, **self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]
def validate_parse_dates_presence(
parse_dates: bool | list, columns: Sequence[Hashable]
) -> set:
"""
Check if parse_dates are in columns.
If user has provided names for parse_dates, check if those columns
are available.
Parameters
----------
columns : list
List of names of the dataframe.
Returns
-------
The names of the columns which will get parsed later if a list
is given as specification.
Raises
------
ValueError
If column to parse_date is not in dataframe.
"""
if not isinstance(parse_dates, list):
return set()
missing = set()
unique_cols = set()
for col in parse_dates:
if isinstance(col, str):
if col not in columns:
missing.add(col)
else:
unique_cols.add(col)
elif col in columns:
unique_cols.add(col)
else:
> unique_cols.add(columns[col])
^^^^^^^^^^^^
E TypeError: list indices must be integers or slices, not list
/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
____________________________ test_filter_by_weekday ____________________________
def test_filter_by_weekday():
# Test filtering by weekday
dataset = Dataset(
'datasets/measures2', format='txt', datetime_columns=['date', 'time']
)
> dataset.load()
tests/test_dataset.py:311:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:63: in load
data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
return _read(filepath_or_buffer, kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
self._engine = self._make_engine(f, self.engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
return mapping[engine](f, **self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]
def validate_parse_dates_presence(
parse_dates: bool | list, columns: Sequence[Hashable]
) -> set:
"""
Check if parse_dates are in columns.
If user has provided names for parse_dates, check if those columns
are available.
Parameters
----------
columns : list
List of names of the dataframe.
Returns
-------
The names of the columns which will get parsed later if a list
is given as specification.
Raises
------
ValueError
If column to parse_date is not in dataframe.
"""
if not isinstance(parse_dates, list):
return set()
missing = set()
unique_cols = set()
for col in parse_dates:
if isinstance(col, str):
if col not in columns:
missing.add(col)
else:
unique_cols.add(col)
elif col in columns:
unique_cols.add(col)
else:
> unique_cols.add(columns[col])
^^^^^^^^^^^^
E TypeError: list indices must be integers or slices, not list
/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_____________________________ test_filter_by_week ______________________________
def test_filter_by_week():
# Test filtering by week
dataset = Dataset(
'datasets/measures2', format='txt', datetime_columns=['date', 'time']
)
> dataset.load()
tests/test_dataset.py:335:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:63: in load
data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
return _read(filepath_or_buffer, kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
self._engine = self._make_engine(f, self.engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
return mapping[engine](f, **self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]
def validate_parse_dates_presence(
parse_dates: bool | list, columns: Sequence[Hashable]
) -> set:
"""
Check if parse_dates are in columns.
If user has provided names for parse_dates, check if those columns
are available.
Parameters
----------
columns : list
List of names of the dataframe.
Returns
-------
The names of the columns which will get parsed later if a list
is given as specification.
Raises
------
ValueError
If column to parse_date is not in dataframe.
"""
if not isinstance(parse_dates, list):
return set()
missing = set()
unique_cols = set()
for col in parse_dates:
if isinstance(col, str):
if col not in columns:
missing.add(col)
else:
unique_cols.add(col)
elif col in columns:
unique_cols.add(col)
else:
> unique_cols.add(columns[col])
^^^^^^^^^^^^
E TypeError: list indices must be integers or slices, not list
/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_____________________________ test_filter_by_month _____________________________
def test_filter_by_month():
# Test filtering by month
dataset = Dataset(
'datasets/measures2', format='txt', datetime_columns=['date', 'time']
)
> dataset.load()
tests/test_dataset.py:358:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:63: in load
data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
return _read(filepath_or_buffer, kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
self._engine = self._make_engine(f, self.engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
return mapping[engine](f, **self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]
def validate_parse_dates_presence(
parse_dates: bool | list, columns: Sequence[Hashable]
) -> set:
"""
Check if parse_dates are in columns.
If user has provided names for parse_dates, check if those columns
are available.
Parameters
----------
columns : list
List of names of the dataframe.
Returns
-------
The names of the columns which will get parsed later if a list
is given as specification.
Raises
------
ValueError
If column to parse_date is not in dataframe.
"""
if not isinstance(parse_dates, list):
return set()
missing = set()
unique_cols = set()
for col in parse_dates:
if isinstance(col, str):
if col not in columns:
missing.add(col)
else:
unique_cols.add(col)
elif col in columns:
unique_cols.add(col)
else:
> unique_cols.add(columns[col])
^^^^^^^^^^^^
E TypeError: list indices must be integers or slices, not list
/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_____________________________ test_filter_by_year ______________________________
def test_filter_by_year():
# Test filtering by year
dataset = Dataset(
'datasets/measures2', format='txt', datetime_columns=['date', 'time']
)
> dataset.load()
tests/test_dataset.py:381:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arm_preprocessing/dataset.py:63: in load
data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
return _read(filepath_or_buffer, kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
self._engine = self._make_engine(f, self.engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
return mapping[engine](f, **self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]
def validate_parse_dates_presence(
parse_dates: bool | list, columns: Sequence[Hashable]
) -> set:
"""
Check if parse_dates are in columns.
If user has provided names for parse_dates, check if those columns
are available.
Parameters
----------
columns : list
List of names of the dataframe.
Returns
-------
The names of the columns which will get parsed later if a list
is given as specification.
Raises
------
ValueError
If column to parse_date is not in dataframe.
"""
if not isinstance(parse_dates, list):
return set()
missing = set()
unique_cols = set()
for col in parse_dates:
if isinstance(col, str):
if col not in columns:
missing.add(col)
else:
unique_cols.add(col)
elif col in columns:
unique_cols.add(col)
else:
> unique_cols.add(columns[col])
^^^^^^^^^^^^
E TypeError: list indices must be integers or slices, not list
/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
=============================== warnings summary ===============================
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:64
/usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:64: PyparsingDeprecationWarning: 'oneOf' deprecated - use 'one_of'
prop = Group((name + Suppress("=") + comma_separated(value)) | oneOf(_CONSTANTS))
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
/usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85: PyparsingDeprecationWarning: 'parseString' deprecated - use 'parse_string'
parse = parser.parseString(pattern)
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
/usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89: PyparsingDeprecationWarning: 'resetCache' deprecated - use 'reset_cache'
parser.resetCache()
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_mathtext.py:45
/usr/lib/python3.14/site-packages/matplotlib/_mathtext.py:45: PyparsingDeprecationWarning: 'enablePackrat' deprecated - use 'enable_packrat'
ParserElement.enablePackrat()
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED tests/test_dataset.py::test_load_data_txt_datetime - TypeError: list i...
FAILED tests/test_dataset.py::test_identify_dataset_timeseries - TypeError: l...
FAILED tests/test_dataset.py::test_identify_dataset_mixed - AssertionError: a...
FAILED tests/test_dataset.py::test_identify_dataset_categorical - AssertionEr...
FAILED tests/test_dataset.py::test_missing_values_impute - TypeError: Cannot ...
FAILED tests/test_dataset.py::test_feature_scaling_normalisation - TypeError:...
FAILED tests/test_dataset.py::test_feature_scaling_standardisation - TypeErro...
FAILED tests/test_dataset.py::test_filter_between_dates - TypeError: list ind...
FAILED tests/test_dataset.py::test_filter_by_minute - TypeError: list indices...
FAILED tests/test_dataset.py::test_filter_by_hour - TypeError: list indices m...
FAILED tests/test_dataset.py::test_filter_by_day - TypeError: list indices mu...
FAILED tests/test_dataset.py::test_filter_by_weekday - TypeError: list indice...
FAILED tests/test_dataset.py::test_filter_by_week - TypeError: list indices m...
FAILED tests/test_dataset.py::test_filter_by_month - TypeError: list indices ...
FAILED tests/test_dataset.py::test_filter_by_year - TypeError: list indices m...
================== 15 failed, 20 passed, 14 warnings in 1.78s ==================
>>> ERROR: py3-arm-preprocessing: check failed
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels