数据过滤表达式中有没有查找函数

用户成长系列
标签: #<Tag:0x00007fc83b353710>

(tpda) #1

数据过滤表达式中有没有查找函数 比如sql里面的 find_in_str ,例如想在字符串“机构买入2次” 查找是否有“机构” ,有就返回真,


(topnheran) #2

这个需要自己做处理,比如用自定义模块代码df[df.xxx.contains(‘机构买入2次’)]


(iQuant) #3

参考下面的例子模块

克隆策略

    {"Description":"实验创建于2020/2/10","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-210:input_1","SourceOutputPortId":"-159:data"},{"DestinationInputPortId":"-159:instruments","SourceOutputPortId":"-165:data"}],"ModuleNodes":[{"Id":"-159","ModuleId":"BigQuantSpace.use_datasource.use_datasource-v1","ModuleParameters":[{"Name":"datasource_id","Value":"industry_CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-159"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-159"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-159","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"Comment":"","CommentCollapsed":true},{"Id":"-165","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2019-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2019-10-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":0,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-165"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-165","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"Comment":"","CommentCollapsed":true},{"Id":"-198","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"\n# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, concept_str):\n # 示例代码如下。在这里编写您的代码\n df = input_1.read_df()#读取上一个模块的数据\n ins= list(df.instrument.unique())\n start = df.date.min().strftime('%Y-%m-%d')\n end = df.date.max().strftime('%Y-%m-%d')\n concepts = set(concept_str.split(';'))\n #获取股票的概念字段\n df_concept=DataSource('industry_CN_STOCK_A').read(instruments=ins, start_date=start, end_date=end,\n fields=['concept'])\n df_merge=pd.merge(df,df_concept,on=['instrument','date'])\n \n def judge(x):\n if x==None:\n return False\n \n result = False\n try:\n for s in concepts:\n for q in x.split(';'):\n if s in q:\n result = True\n except:\n result = False\n return result\n \n #取出有智能家居概念的股票\n df_filter=df_merge[df_merge['concept'].apply(judge)] #获取智能家居概念包含的股票\n data_1 = DataSource.write_df(df_filter)\n return Outputs(data_1=data_1, data_2=None, data_3=None)","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"input_1","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{\n 'concept_str': '医疗;5G'\n}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"data_1","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-198"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-198"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-198"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-198","OutputType":null},{"Name":"data_2","NodeId":"-198","OutputType":null},{"Name":"data_3","NodeId":"-198","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":6,"Comment":"","CommentCollapsed":true},{"Id":"-210","ModuleId":"BigQuantSpace.column_str_filter.column_str_filter-v2","ModuleParameters":[{"Name":"col","Value":"concept","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"substr","Value":"5G;医疗","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-210"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-210","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":5,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='-159' Position='273,162.1999969482422,200,200'/><NodePosition Node='-165' Position='176,56.19999694824219,200,200'/><NodePosition Node='-198' Position='492,283.5,200,200'/><NodePosition Node='-210' Position='153,302.79998779296875,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
    In [36]:
    # 本代码由可视化策略环境自动生成 2020年2月10日 10:43
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m6_run_bigquant_run(input_1, concept_str):
        # 示例代码如下。在这里编写您的代码
        df = input_1.read_df()#读取上一个模块的数据
        ins= list(df.instrument.unique())
        start = df.date.min().strftime('%Y-%m-%d')
        end = df.date.max().strftime('%Y-%m-%d')
        concepts = set(concept_str.split(';'))
        #获取股票的概念字段
        df_concept=DataSource('industry_CN_STOCK_A').read(instruments=ins, start_date=start, end_date=end,
                   fields=['concept'])
        df_merge=pd.merge(df,df_concept,on=['instrument','date'])
        
        def judge(x):
            if x==None:
                return False
            
            result = False
            try:
                for s in concepts:
                    for q in x.split(';'):
                        if s in q:
                            result = True
            except:
                result = False
            return result
        
        #取出有智能家居概念的股票
        df_filter=df_merge[df_merge['concept'].apply(judge)] #获取智能家居概念包含的股票
        data_1 = DataSource.write_df(df_filter)
        return Outputs(data_1=data_1, data_2=None, data_3=None)
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m6_post_run_bigquant_run(outputs):
        return outputs
    
    
    m2 = M.instruments.v2(
        start_date='2019-01-01',
        end_date='2019-10-01',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m1 = M.use_datasource.v1(
        instruments=m2.data,
        datasource_id='industry_CN_STOCK_A',
        start_date='',
        end_date=''
    )
    
    m5 = M.column_str_filter.v2(
        input_1=m1.data,
        col='concept',
        substr='5G;医疗',
        m_cached=False
    )
    
    m6 = M.cached.v3(
        run=m6_run_bigquant_run,
        post_run=m6_post_run_bigquant_run,
        input_ports='input_1',
        params="""{
        'concept_str': '医疗;5G'
    }""",
        output_ports='data_1'
    )