pandas: FloatingPointError con np.seterr (all = ‘raise’) y datos faltantes

Estoy obteniendo un FloatingPointError cuando quiero ver datos relacionados con datos faltantes.

import numpy as np import pandas as pd np.seterr(all='raise') s = pd.Series([np.nan,np.nan,np.nan],index=[1,2,3]); print(s); print(s.head()) 

Estoy en la versión más reciente de pandas , instalada a través de

 conda install -f pandas 

Después de pkill python y conda remove pandas .

Aquí está el rastro de vuelta:

 Out[4]: --------------------------------------------------------------------------- FloatingPointError Traceback (most recent call last) /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/IPython/core/formatters.pyc in __call__(self, obj) 695 type_pprinters=self.type_printers, 696 deferred_pprinters=self.deferred_printers) --> 697 printer.pretty(obj) 698 printer.flush() 699 return stream.getvalue() /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj) 381 if callable(meth): 382 return meth(obj, self, cycle) --> 383 return _default_pprint(obj, self, cycle) 384 finally: 385 self.end_group() /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/IPython/lib/pretty.pyc in _default_pprint(obj, p, cycle) 501 if _safe_getattr(klass, '__repr__', None) not in _baseclass_reprs: 502 # A user-provided repr. Find newlines and replace them with p.break_() --> 503 _repr_pprint(obj, p, cycle) 504 return 505 p.begin_group(1, ' 685 output = repr(obj) 686 for idx,output_line in enumerate(output.splitlines()): 687 if idx: /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/base.pyc in __repr__(self) 61 Yields Bytestring in Py2, Unicode String in py3. 62 """ ---> 63 return str(self) 64 65 /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/base.pyc in __str__(self) 41 if compat.PY3: 42 return self.__unicode__() ---> 43 return self.__bytes__() 44 45 def __bytes__(self): /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/base.pyc in __bytes__(self) 53 54 encoding = get_option("display.encoding") ---> 55 return self.__unicode__().encode(encoding, 'replace') 56 57 def __repr__(self): /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/series.pyc in __unicode__(self) 954 955 self.to_string(buf=buf, name=self.name, dtype=self.dtype, --> 956 max_rows=max_rows) 957 result = buf.getvalue() 958 /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/series.pyc in to_string(self, buf, na_rep, float_format, header, length, dtype, name, max_rows) 992 the_repr = self._get_repr(float_format=float_format, na_rep=na_rep, 993 header=header, length=length, dtype=dtype, --> 994 name=name, max_rows=max_rows) 995 996 # catch contract violations /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/series.pyc in _get_repr(self, name, header, length, dtype, na_rep, float_format, max_rows) 1022 float_format=float_format, 1023 max_rows=max_rows) -> 1024 result = formatter.to_string() 1025 1026 # TODO: following check prob. not neces. /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/format.pyc in to_string(self) 223 224 fmt_index, have_header = self._get_formatted_index() --> 225 fmt_values = self._get_formatted_values() 226 227 maxlen = max(self.adj.len(x) for x in fmt_index) # max index len /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/format.pyc in _get_formatted_values(self) 213 return format_array(self.tr_series._values, None, 214 float_format=self.float_format, --> 215 na_rep=self.na_rep) 216 217 def to_string(self): /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/format.pyc in format_array(values, formatter, float_format, na_rep, digits, space, justify) 1974 justify=justify) 1975 -> 1976 return fmt_obj.get_result() 1977 1978 /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/format.pyc in get_result(self) 1990 1991 def get_result(self): -> 1992 fmt_values = self._format_strings() 1993 return _make_fixed_width(fmt_values, self.justify) 1994 /home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/format.pyc in _format_strings(self) 2085 2086 # this is pretty arbitrary for now -> 2087 has_large_values = (abs_vals > 1e8).any() 2088 has_small_values = ((abs_vals  0)).any() FloatingPointError: invalid value encountered in greater 

Cada vez que import pandas , todos los errores numpy se configuran para ser ignorados. Esto es actualmente un comportamiento indocumentado.

Esto se hace en pandas / compat / numpy_compat.py

 # TODO: HACK for NumPy 1.5.1 to suppress warnings # is this necessary? try: np.seterr(all='ignore') except Exception: # pragma: no cover pass 

Así es como se desarrolla

 In [1]: import numpy as np In [2]: np.geterr() Out[2]: {'divide': 'warn', 'invalid': 'warn', 'over': 'warn', 'under': 'ignore'} In [3]: import pandas as pd In [4]: np.geterr() Out[4]: {'divide': 'ignore', 'invalid': 'ignore', 'over': 'ignore', 'under': 'ignore'} In [5]: s = pd.Series([np.nan,np.nan,np.nan],index=[1,2,3]); print(s); print(s.head()) 1 NaN 2 NaN 3 NaN dtype: float64 1 NaN 2 NaN 3 NaN dtype: float64 In [6]: np.seterr(invalid='raise') Out[6]: {'divide': 'ignore', 'invalid': 'ignore', 'over': 'ignore', 'under': 'ignore'} In [7]: s = pd.Series([np.nan,np.nan,np.nan],index=[1,2,3]); print(s); print(s.head()) FloatingPointError: invalid value encountered in greater 

La “solución” es, por lo tanto, no np.seterr(invalid'raise') , siempre que use pandas (especialmente cuando trabaje con datos faltantes).