Skip to content

tests fails with python 3.14 on alpine linux (x86_64) #40

@ncopa

Description

@ncopa
============================= test session starts ==============================
platform linux -- Python 3.14.3, pytest-9.0.2, pluggy-1.6.0
rootdir: /home/ncopa/aports/community/py3-arm-preprocessing/src/arm-preprocessing-0.2.5
configfile: pyproject.toml
plugins: xdist-3.8.0
collected 35 items

tests/test_dataset.py .F.....FF.FF...FF....FFFFFFFF                      [ 82%]
tests/test_discretisation.py ....                                        [ 94%]
tests/test_squashing.py ..                                               [100%]

=================================== FAILURES ===================================
_________________________ test_load_data_txt_datetime __________________________

    def test_load_data_txt_datetime():
        # Test loading CSV without datetime columns
        dataset = Dataset(
            'datasets/measures2', format='txt', datetime_columns=['date', 'time']
        )
>       dataset.load()

tests/test_dataset.py:21: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:63: in load
    data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
    return mapping[engine](f, **self.options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
    validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]

    def validate_parse_dates_presence(
        parse_dates: bool | list, columns: Sequence[Hashable]
    ) -> set:
        """
        Check if parse_dates are in columns.
    
        If user has provided names for parse_dates, check if those columns
        are available.
    
        Parameters
        ----------
        columns : list
            List of names of the dataframe.
    
        Returns
        -------
        The names of the columns which will get parsed later if a list
        is given as specification.
    
        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.
    
        """
        if not isinstance(parse_dates, list):
            return set()
    
        missing = set()
        unique_cols = set()
        for col in parse_dates:
            if isinstance(col, str):
                if col not in columns:
                    missing.add(col)
                else:
                    unique_cols.add(col)
            elif col in columns:
                unique_cols.add(col)
            else:
>               unique_cols.add(columns[col])
                                ^^^^^^^^^^^^
E               TypeError: list indices must be integers or slices, not list

/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_______________________ test_identify_dataset_timeseries _______________________

    def test_identify_dataset_timeseries():
        # Test identifying time-series dataset
        dataset = Dataset(
            'datasets/measures2', format='txt', datetime_columns=['date', 'time']
        )
>       dataset.load()

tests/test_dataset.py:88: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:63: in load
    data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
    return mapping[engine](f, **self.options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
    validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]

    def validate_parse_dates_presence(
        parse_dates: bool | list, columns: Sequence[Hashable]
    ) -> set:
        """
        Check if parse_dates are in columns.
    
        If user has provided names for parse_dates, check if those columns
        are available.
    
        Parameters
        ----------
        columns : list
            List of names of the dataframe.
    
        Returns
        -------
        The names of the columns which will get parsed later if a list
        is given as specification.
    
        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.
    
        """
        if not isinstance(parse_dates, list):
            return set()
    
        missing = set()
        unique_cols = set()
        for col in parse_dates:
            if isinstance(col, str):
                if col not in columns:
                    missing.add(col)
                else:
                    unique_cols.add(col)
            elif col in columns:
                unique_cols.add(col)
            else:
>               unique_cols.add(columns[col])
                                ^^^^^^^^^^^^
E               TypeError: list indices must be integers or slices, not list

/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_________________________ test_identify_dataset_mixed __________________________

    def test_identify_dataset_mixed():
        # Test identifying mixed dataset
        dataset = Dataset('datasets/artm_test_dataset', format='json')
        dataset.load()
>       assert dataset.information['type'] == 'mixed'
E       AssertionError: assert 'numerical' == 'mixed'
E         
E         - mixed
E         + numerical

tests/test_dataset.py:96: AssertionError
______________________ test_identify_dataset_categorical _______________________

    def test_identify_dataset_categorical():
        # Test identifying categorical dataset
        dataset = Dataset('datasets/breast', format='csv')
        dataset.load()
>       assert dataset.information['type'] == 'categorical'
E       AssertionError: assert 'numerical' == 'categorical'
E         
E         - categorical
E         + numerical

tests/test_dataset.py:110: AssertionError
__________________________ test_missing_values_impute __________________________

    def test_missing_values_impute():
        # Test imputing missing values
        dataset = Dataset('examples/missing_values/data', format='csv')
        dataset.load()
>       dataset.missing_values(method='impute')

tests/test_dataset.py:117: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:218: in missing_values
    self.data[column].mean(), inplace=True)
    ^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/util/_decorators.py:336: in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/series.py:8113: in mean
    return NDFrame.mean(
/usr/lib/python3.14/site-packages/pandas/core/generic.py:11831: in mean
    return self._stat_function(
/usr/lib/python3.14/site-packages/pandas/core/generic.py:11785: in _stat_function
    return self._reduce(
/usr/lib/python3.14/site-packages/pandas/core/series.py:7480: in _reduce
    result = delegate._reduce(name, skipna=skipna, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <StringArray>
['John', 'Jane', nan, 'Michael', 'Sara']
Length: 5, dtype: str
name = 'mean', skipna = True, keepdims = False, axis = 0, kwargs = {}

    def _reduce(
        self,
        name: str,
        *,
        skipna: bool = True,
        keepdims: bool = False,
        axis: AxisInt | None = 0,
        **kwargs,
    ):
        if self.dtype.na_value is np.nan and name in ["any", "all"]:
            if name == "any":
                return nanops.nanany(self._ndarray, skipna=skipna)
            else:
                return nanops.nanall(self._ndarray, skipna=skipna)
    
        if name in ["min", "max", "argmin", "argmax", "sum"]:
            result = getattr(self, name)(skipna=skipna, axis=axis, **kwargs)
            if keepdims:
                return self._from_sequence([result], dtype=self.dtype)
            return result
    
>       raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
E       TypeError: Cannot perform reduction 'mean' with string dtype

/usr/lib/python3.14/site-packages/pandas/core/arrays/string_.py:967: TypeError
______________________ test_feature_scaling_normalisation ______________________

    def test_feature_scaling_normalisation():
        # Test feature scaling using normalisation
        dataset = Dataset('datasets/Abalone', format='csv')
        dataset.load()
>       dataset.scale(method='normalisation')

tests/test_dataset.py:149: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:290: in scale
    self.data[column] - self.data[column].min()
/usr/lib/python3.14/site-packages/pandas/core/ops/common.py:85: in new_method
    return method(self, other)
           ^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/arraylike.py:198: in __sub__
    return self._arith_method(other, operator.sub)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/series.py:6751: in _arith_method
    return base.IndexOpsMixin._arith_method(self, other, op)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/base.py:1644: in _arith_method
    result = ops.arithmetic_op(lvalues, rvalues, op)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/ops/array_ops.py:279: in arithmetic_op
    res_values = op(left, right)
                 ^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/ops/common.py:85: in new_method
    return method(self, other)
           ^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/arraylike.py:198: in __sub__
    return self._arith_method(other, operator.sub)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <StringArray>
['M', 'M', 'F', 'M', 'I', 'I', 'F', 'F', 'M', 'F',
 ...
 'M', 'F', 'M', 'M', 'M', 'F', 'M', 'M', 'F', 'M']
Length: 4177, dtype: str
other = 'F', op = <built-in function sub>

    def _cmp_method(self, other, op):
        from pandas.arrays import (
            ArrowExtensionArray,
            BooleanArray,
        )
    
        if (
            isinstance(other, BaseStringArray)
            and self.dtype.na_value is not libmissing.NA
            and other.dtype.na_value is libmissing.NA
        ):
            # NA has priority of NaN semantics
            return op(self.astype(other.dtype, copy=False), other)
    
        if isinstance(other, ArrowExtensionArray):
            if isinstance(other, BaseStringArray):
                # pyarrow storage has priority over python storage
                # (except if we have NA semantics and other not)
                if not (
                    self.dtype.na_value is libmissing.NA
                    and other.dtype.na_value is not libmissing.NA
                ):
                    return NotImplemented
            else:
                return NotImplemented
    
        if isinstance(other, StringArray):
            other = other._ndarray
    
        mask = isna(self) | isna(other)
        valid = ~mask
    
        if lib.is_list_like(other):
            if len(other) != len(self):
                # prevent improper broadcasting when other is 2D
                raise ValueError(
                    f"Lengths of operands do not match: {len(self)} != {len(other)}"
                )
    
            # for array-likes, first filter out NAs before converting to numpy
            if not is_array_like(other):
                other = np.asarray(other)
            other = other[valid]
    
        other_dtype = getattr(other, "dtype", None)
        if op.__name__.strip("_") in ["mul", "rmul"] and (
            lib.is_bool(other) or lib.is_np_dtype(other_dtype, "b")
        ):
            # GH#62595
            raise TypeError(
                "Cannot multiply StringArray by bools. "
                "Explicitly cast to integers instead."
            )
    
        if op.__name__ in ops.ARITHMETIC_BINOPS:
            result = np.empty_like(self._ndarray, dtype="object")
            result[mask] = self.dtype.na_value
>           result[valid] = op(self._ndarray[valid], other)
                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E           TypeError: unsupported operand type(s) for -: 'str' and 'str'

/usr/lib/python3.14/site-packages/pandas/core/arrays/string_.py:1212: TypeError
_____________________ test_feature_scaling_standardisation _____________________

    def test_feature_scaling_standardisation():
        # Test feature scaling using standardisation
        dataset = Dataset('datasets/Abalone', format='csv')
        dataset.load()
>       dataset.scale(method='standardisation')

tests/test_dataset.py:162: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:294: in scale
    self.data[column] - self.data[column].mean()
                        ^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/util/_decorators.py:336: in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/core/series.py:8113: in mean
    return NDFrame.mean(
/usr/lib/python3.14/site-packages/pandas/core/generic.py:11831: in mean
    return self._stat_function(
/usr/lib/python3.14/site-packages/pandas/core/generic.py:11785: in _stat_function
    return self._reduce(
/usr/lib/python3.14/site-packages/pandas/core/series.py:7480: in _reduce
    result = delegate._reduce(name, skipna=skipna, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <StringArray>
['M', 'M', 'F', 'M', 'I', 'I', 'F', 'F', 'M', 'F',
 ...
 'M', 'F', 'M', 'M', 'M', 'F', 'M', 'M', 'F', 'M']
Length: 4177, dtype: str
name = 'mean', skipna = True, keepdims = False, axis = 0, kwargs = {}

    def _reduce(
        self,
        name: str,
        *,
        skipna: bool = True,
        keepdims: bool = False,
        axis: AxisInt | None = 0,
        **kwargs,
    ):
        if self.dtype.na_value is np.nan and name in ["any", "all"]:
            if name == "any":
                return nanops.nanany(self._ndarray, skipna=skipna)
            else:
                return nanops.nanall(self._ndarray, skipna=skipna)
    
        if name in ["min", "max", "argmin", "argmax", "sum"]:
            result = getattr(self, name)(skipna=skipna, axis=axis, **kwargs)
            if keepdims:
                return self._from_sequence([result], dtype=self.dtype)
            return result
    
>       raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
E       TypeError: Cannot perform reduction 'mean' with string dtype

/usr/lib/python3.14/site-packages/pandas/core/arrays/string_.py:967: TypeError
__________________________ test_filter_between_dates ___________________________

    def test_filter_between_dates():
        # Test filtering between dates
        dataset = Dataset(
            'datasets/measures2', format='txt', datetime_columns=['date', 'time']
        )
>       dataset.load()

tests/test_dataset.py:213: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:63: in load
    data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
    return mapping[engine](f, **self.options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
    validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]

    def validate_parse_dates_presence(
        parse_dates: bool | list, columns: Sequence[Hashable]
    ) -> set:
        """
        Check if parse_dates are in columns.
    
        If user has provided names for parse_dates, check if those columns
        are available.
    
        Parameters
        ----------
        columns : list
            List of names of the dataframe.
    
        Returns
        -------
        The names of the columns which will get parsed later if a list
        is given as specification.
    
        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.
    
        """
        if not isinstance(parse_dates, list):
            return set()
    
        missing = set()
        unique_cols = set()
        for col in parse_dates:
            if isinstance(col, str):
                if col not in columns:
                    missing.add(col)
                else:
                    unique_cols.add(col)
            elif col in columns:
                unique_cols.add(col)
            else:
>               unique_cols.add(columns[col])
                                ^^^^^^^^^^^^
E               TypeError: list indices must be integers or slices, not list

/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
____________________________ test_filter_by_minute _____________________________

    def test_filter_by_minute():
        # Test filtering by minute
        dataset = Dataset(
            'datasets/measures2', format='txt', datetime_columns=['date', 'time']
        )
>       dataset.load()

tests/test_dataset.py:243: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:63: in load
    data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
    return mapping[engine](f, **self.options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
    validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]

    def validate_parse_dates_presence(
        parse_dates: bool | list, columns: Sequence[Hashable]
    ) -> set:
        """
        Check if parse_dates are in columns.
    
        If user has provided names for parse_dates, check if those columns
        are available.
    
        Parameters
        ----------
        columns : list
            List of names of the dataframe.
    
        Returns
        -------
        The names of the columns which will get parsed later if a list
        is given as specification.
    
        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.
    
        """
        if not isinstance(parse_dates, list):
            return set()
    
        missing = set()
        unique_cols = set()
        for col in parse_dates:
            if isinstance(col, str):
                if col not in columns:
                    missing.add(col)
                else:
                    unique_cols.add(col)
            elif col in columns:
                unique_cols.add(col)
            else:
>               unique_cols.add(columns[col])
                                ^^^^^^^^^^^^
E               TypeError: list indices must be integers or slices, not list

/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_____________________________ test_filter_by_hour ______________________________

    def test_filter_by_hour():
        # Test filtering by hour
        dataset = Dataset(
            'datasets/measures2', format='txt', datetime_columns=['date', 'time']
        )
>       dataset.load()

tests/test_dataset.py:266: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:63: in load
    data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
    return mapping[engine](f, **self.options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
    validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]

    def validate_parse_dates_presence(
        parse_dates: bool | list, columns: Sequence[Hashable]
    ) -> set:
        """
        Check if parse_dates are in columns.
    
        If user has provided names for parse_dates, check if those columns
        are available.
    
        Parameters
        ----------
        columns : list
            List of names of the dataframe.
    
        Returns
        -------
        The names of the columns which will get parsed later if a list
        is given as specification.
    
        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.
    
        """
        if not isinstance(parse_dates, list):
            return set()
    
        missing = set()
        unique_cols = set()
        for col in parse_dates:
            if isinstance(col, str):
                if col not in columns:
                    missing.add(col)
                else:
                    unique_cols.add(col)
            elif col in columns:
                unique_cols.add(col)
            else:
>               unique_cols.add(columns[col])
                                ^^^^^^^^^^^^
E               TypeError: list indices must be integers or slices, not list

/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
______________________________ test_filter_by_day ______________________________

    def test_filter_by_day():
        # Test filtering by day
        dataset = Dataset(
            'datasets/measures2', format='txt', datetime_columns=['date', 'time']
        )
>       dataset.load()

tests/test_dataset.py:289: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:63: in load
    data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
    return mapping[engine](f, **self.options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
    validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]

    def validate_parse_dates_presence(
        parse_dates: bool | list, columns: Sequence[Hashable]
    ) -> set:
        """
        Check if parse_dates are in columns.
    
        If user has provided names for parse_dates, check if those columns
        are available.
    
        Parameters
        ----------
        columns : list
            List of names of the dataframe.
    
        Returns
        -------
        The names of the columns which will get parsed later if a list
        is given as specification.
    
        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.
    
        """
        if not isinstance(parse_dates, list):
            return set()
    
        missing = set()
        unique_cols = set()
        for col in parse_dates:
            if isinstance(col, str):
                if col not in columns:
                    missing.add(col)
                else:
                    unique_cols.add(col)
            elif col in columns:
                unique_cols.add(col)
            else:
>               unique_cols.add(columns[col])
                                ^^^^^^^^^^^^
E               TypeError: list indices must be integers or slices, not list

/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
____________________________ test_filter_by_weekday ____________________________

    def test_filter_by_weekday():
        # Test filtering by weekday
        dataset = Dataset(
            'datasets/measures2', format='txt', datetime_columns=['date', 'time']
        )
>       dataset.load()

tests/test_dataset.py:311: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:63: in load
    data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
    return mapping[engine](f, **self.options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
    validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]

    def validate_parse_dates_presence(
        parse_dates: bool | list, columns: Sequence[Hashable]
    ) -> set:
        """
        Check if parse_dates are in columns.
    
        If user has provided names for parse_dates, check if those columns
        are available.
    
        Parameters
        ----------
        columns : list
            List of names of the dataframe.
    
        Returns
        -------
        The names of the columns which will get parsed later if a list
        is given as specification.
    
        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.
    
        """
        if not isinstance(parse_dates, list):
            return set()
    
        missing = set()
        unique_cols = set()
        for col in parse_dates:
            if isinstance(col, str):
                if col not in columns:
                    missing.add(col)
                else:
                    unique_cols.add(col)
            elif col in columns:
                unique_cols.add(col)
            else:
>               unique_cols.add(columns[col])
                                ^^^^^^^^^^^^
E               TypeError: list indices must be integers or slices, not list

/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_____________________________ test_filter_by_week ______________________________

    def test_filter_by_week():
        # Test filtering by week
        dataset = Dataset(
            'datasets/measures2', format='txt', datetime_columns=['date', 'time']
        )
>       dataset.load()

tests/test_dataset.py:335: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:63: in load
    data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
    return mapping[engine](f, **self.options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
    validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]

    def validate_parse_dates_presence(
        parse_dates: bool | list, columns: Sequence[Hashable]
    ) -> set:
        """
        Check if parse_dates are in columns.
    
        If user has provided names for parse_dates, check if those columns
        are available.
    
        Parameters
        ----------
        columns : list
            List of names of the dataframe.
    
        Returns
        -------
        The names of the columns which will get parsed later if a list
        is given as specification.
    
        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.
    
        """
        if not isinstance(parse_dates, list):
            return set()
    
        missing = set()
        unique_cols = set()
        for col in parse_dates:
            if isinstance(col, str):
                if col not in columns:
                    missing.add(col)
                else:
                    unique_cols.add(col)
            elif col in columns:
                unique_cols.add(col)
            else:
>               unique_cols.add(columns[col])
                                ^^^^^^^^^^^^
E               TypeError: list indices must be integers or slices, not list

/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_____________________________ test_filter_by_month _____________________________

    def test_filter_by_month():
        # Test filtering by month
        dataset = Dataset(
            'datasets/measures2', format='txt', datetime_columns=['date', 'time']
        )
>       dataset.load()

tests/test_dataset.py:358: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:63: in load
    data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
    return mapping[engine](f, **self.options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
    validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]

    def validate_parse_dates_presence(
        parse_dates: bool | list, columns: Sequence[Hashable]
    ) -> set:
        """
        Check if parse_dates are in columns.
    
        If user has provided names for parse_dates, check if those columns
        are available.
    
        Parameters
        ----------
        columns : list
            List of names of the dataframe.
    
        Returns
        -------
        The names of the columns which will get parsed later if a list
        is given as specification.
    
        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.
    
        """
        if not isinstance(parse_dates, list):
            return set()
    
        missing = set()
        unique_cols = set()
        for col in parse_dates:
            if isinstance(col, str):
                if col not in columns:
                    missing.add(col)
                else:
                    unique_cols.add(col)
            elif col in columns:
                unique_cols.add(col)
            else:
>               unique_cols.add(columns[col])
                                ^^^^^^^^^^^^
E               TypeError: list indices must be integers or slices, not list

/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
_____________________________ test_filter_by_year ______________________________

    def test_filter_by_year():
        # Test filtering by year
        dataset = Dataset(
            'datasets/measures2', format='txt', datetime_columns=['date', 'time']
        )
>       dataset.load()

tests/test_dataset.py:381: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
arm_preprocessing/dataset.py:63: in load
    data = pd.read_csv(
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873: in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645: in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1922: in _make_engine
    return mapping[engine](f, **self.options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/lib/python3.14/site-packages/pandas/io/parsers/c_parser_wrapper.py:152: in __init__
    validate_parse_dates_presence(self.parse_dates, self.names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

parse_dates = [['date', 'time']]
columns = ['mp', 'temperature', 'humidity', 'soil', 'light', 'date', ...]

    def validate_parse_dates_presence(
        parse_dates: bool | list, columns: Sequence[Hashable]
    ) -> set:
        """
        Check if parse_dates are in columns.
    
        If user has provided names for parse_dates, check if those columns
        are available.
    
        Parameters
        ----------
        columns : list
            List of names of the dataframe.
    
        Returns
        -------
        The names of the columns which will get parsed later if a list
        is given as specification.
    
        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.
    
        """
        if not isinstance(parse_dates, list):
            return set()
    
        missing = set()
        unique_cols = set()
        for col in parse_dates:
            if isinstance(col, str):
                if col not in columns:
                    missing.add(col)
                else:
                    unique_cols.add(col)
            elif col in columns:
                unique_cols.add(col)
            else:
>               unique_cols.add(columns[col])
                                ^^^^^^^^^^^^
E               TypeError: list indices must be integers or slices, not list

/usr/lib/python3.14/site-packages/pandas/io/parsers/base_parser.py:914: TypeError
=============================== warnings summary ===============================
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:64
  /usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:64: PyparsingDeprecationWarning: 'oneOf' deprecated - use 'one_of'
    prop = Group((name + Suppress("=") + comma_separated(value)) | oneOf(_CONSTANTS))

../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85
  /usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:85: PyparsingDeprecationWarning: 'parseString' deprecated - use 'parse_string'
    parse = parser.parseString(pattern)

../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89
  /usr/lib/python3.14/site-packages/matplotlib/_fontconfig_pattern.py:89: PyparsingDeprecationWarning: 'resetCache' deprecated - use 'reset_cache'
    parser.resetCache()

../../../../../../../usr/lib/python3.14/site-packages/matplotlib/_mathtext.py:45
  /usr/lib/python3.14/site-packages/matplotlib/_mathtext.py:45: PyparsingDeprecationWarning: 'enablePackrat' deprecated - use 'enable_packrat'
    ParserElement.enablePackrat()

-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED tests/test_dataset.py::test_load_data_txt_datetime - TypeError: list i...
FAILED tests/test_dataset.py::test_identify_dataset_timeseries - TypeError: l...
FAILED tests/test_dataset.py::test_identify_dataset_mixed - AssertionError: a...
FAILED tests/test_dataset.py::test_identify_dataset_categorical - AssertionEr...
FAILED tests/test_dataset.py::test_missing_values_impute - TypeError: Cannot ...
FAILED tests/test_dataset.py::test_feature_scaling_normalisation - TypeError:...
FAILED tests/test_dataset.py::test_feature_scaling_standardisation - TypeErro...
FAILED tests/test_dataset.py::test_filter_between_dates - TypeError: list ind...
FAILED tests/test_dataset.py::test_filter_by_minute - TypeError: list indices...
FAILED tests/test_dataset.py::test_filter_by_hour - TypeError: list indices m...
FAILED tests/test_dataset.py::test_filter_by_day - TypeError: list indices mu...
FAILED tests/test_dataset.py::test_filter_by_weekday - TypeError: list indice...
FAILED tests/test_dataset.py::test_filter_by_week - TypeError: list indices m...
FAILED tests/test_dataset.py::test_filter_by_month - TypeError: list indices ...
FAILED tests/test_dataset.py::test_filter_by_year - TypeError: list indices m...
================== 15 failed, 20 passed, 14 warnings in 1.78s ==================
>>> ERROR: py3-arm-preprocessing: check failed

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions