问答交流

【代码报错】ValueError: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data)

由hiai创建,最终由small_q 被浏览 5 用户

行业中性化

在复现行业中性化的代码报错

## 加载包
import dai
import pandas as pd
import numpy as np
import math
import warnings
from datetime import datetime, timedelta

from bigmodule import M
from bigtrader.finance.commission import PerOrder

niu_date= '2024-11-10'
today = datetime.now().date().strftime("%Y-%m-%d")

sql="""
SELECT stock.date, stock.instrument,volume, turn, industry_instrument as indst
FROM cn_stock_bar1d AS stock
JOIN cn_stock_industry_component AS indst
USING(instrument,date) 
"""
#QUALIFY factor = 1
data = dai.query(sql, filters={'date':[niu_date, today]}).df()
ind_dummies = pd.get_dummies(data['indst'],prefix='indst')
print(ind_dummies) #返回布尔值?
regression_data = pd.concat([data[['indst','turn']],ind_dummies], axis=1)
regression_data = regression_data.replace([np.inf, -np.inf],np.nan).dropna()
x = regression_data.drop(['indst','turn'], axis=1)

import statsmodels.api as stats
x = stats.add_constant(x) #添加截距项
result = stats.OLS(regression_data['turn'], x).fit()
df['purturn'] = result.resid

视频中dummies方法返回的0,1,但是平台运行得到的print结果是True False,不太确定是因为这个原因导致的报错

报错原因如下:


ValueError Traceback (most recent call last) Cell In[39], line 31 29 import statsmodels.api as stats 30 x = stats.add_constant(x) #添加截距项 ---> 31 result = stats.OLS(regression_data['turn'], x).fit() 32 df['purturn'] = result.resid

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/statsmodels/regression/linear_model.py:924, in OLS.init(self, endog, exog, missing, hasconst, **kwargs) 921 msg = ("Weights are not supported in OLS and will be ignored" 922 "An exception will be raised in the next version.") 923 warnings.warn(msg, ValueWarning) --> 924 super().init(endog, exog, missing=missing, 925 hasconst=hasconst, **kwargs) 926 if "weights" in self._init_keys: 927 self._init_keys.remove("weights")

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/statsmodels/regression/linear_model.py:749, in WLS.init(self, endog, exog, weights, missing, hasconst, **kwargs) 747 else: 748 weights = weights.squeeze() --> 749 super().init(endog, exog, missing=missing, 750 weights=weights, hasconst=hasconst, **kwargs) 751 nobs = self.exog.shape[0] 752 weights = self.weights

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/statsmodels/regression/linear_model.py:203, in RegressionModel.init(self, endog, exog, **kwargs) 202 def init(self, endog, exog, **kwargs): --> 203 super().init(endog, exog, **kwargs) 204 self.pinv_wexog: Float64Array | None = None 205 self._data_attr.extend(['pinv_wexog', 'wendog', 'wexog', 'weights'])

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/statsmodels/base/model.py:270, in LikelihoodModel.init(self, endog, exog, **kwargs) 269 def init(self, endog, exog=None, **kwargs): --> 270 super().init(endog, exog, **kwargs) 271 self.initialize()

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/statsmodels/base/model.py:95, in Model.init(self, endog, exog, **kwargs) 93 missing = kwargs.pop('missing', 'none') 94 hasconst = kwargs.pop('hasconst', None) ---> 95 self.data = self._handle_data(endog, exog, missing, hasconst, 96 **kwargs) 97 self.k_constant = self.data.k_constant 98 self.exog = self.data.exog

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/statsmodels/base/model.py:135, in Model._handle_data(self, endog, exog, missing, hasconst, **kwargs) 134 def _handle_data(self, endog, exog, missing, hasconst, **kwargs): --> 135 data = handle_data(endog, exog, missing, hasconst, **kwargs) 136 # kwargs arrays could have changed, easier to just attach here 137 for key in kwargs:

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/statsmodels/base/data.py:675, in handle_data(endog, exog, missing, hasconst, **kwargs) 672 exog = np.asarray(exog) 674 klass = handle_data_class_factory(endog, exog) --> 675 return klass(endog, exog=exog, missing=missing, hasconst=hasconst, 676 **kwargs)

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/statsmodels/base/data.py:84, in ModelData.init(self, endog, exog, missing, hasconst, **kwargs) 82 self.orig_endog = endog 83 self.orig_exog = exog ---> 84 self.endog, self.exog = self._convert_endog_exog(endog, exog) 86 self.const_idx = None 87 self.k_constant = 0

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/statsmodels/base/data.py:509, in PandasData._convert_endog_exog(self, endog, exog) 507 exog = exog if exog is None else np.asarray(exog) 508 if endog.dtype == object or exog is not None and exog.dtype == object: --> 509 raise ValueError("Pandas data cast to numpy dtype of object. " 510 "Check input data with np.asarray(data).") 511 return super()._convert_endog_exog(endog, exog)

ValueError: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).

标签

Python数据处理
评论
  • 不方便提供完整代码么,你的代码报错似乎不在你公开的代码中
{link}