无监督算法有例子吗?

机器学习
标签: #<Tag:0x00007f4914008250>

(akalanala) #1

如 K均值-聚类 (v1) 如何使用?


(达达) #2

参考这个帖子


(akalanala) #3

这里的例子都是有监督学习,都是分类、回归。我想要的是聚类的例子。


(达达) #4

流程是类似的,看你怎么构建和使用因子

克隆策略

    {"Description":"实验创建于2017/8/26","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-196:instruments","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"DestinationInputPortId":"-250:options_data","SourceOutputPortId":"-184:predictions"},{"DestinationInputPortId":"-219:input_data","SourceOutputPortId":"-196:data"},{"DestinationInputPortId":"-254:input_data","SourceOutputPortId":"-219:data"},{"DestinationInputPortId":"-219:features","SourceOutputPortId":"-227:data"},{"DestinationInputPortId":"-239:features","SourceOutputPortId":"-227:data"},{"DestinationInputPortId":"-184:features","SourceOutputPortId":"-227:data"},{"DestinationInputPortId":"-257:input_data","SourceOutputPortId":"-239:data"},{"DestinationInputPortId":"-239:input_data","SourceOutputPortId":"-248:data"},{"DestinationInputPortId":"-250:history_ds","SourceOutputPortId":"-248:data"},{"DestinationInputPortId":"-184:training_ds","SourceOutputPortId":"-254:data"},{"DestinationInputPortId":"-184:predict_ds","SourceOutputPortId":"-257:data"},{"DestinationInputPortId":"-248:instruments","SourceOutputPortId":"-260:data"},{"DestinationInputPortId":"-250:instruments","SourceOutputPortId":"-260:data"}],"ModuleNodes":[{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2010-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2015-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"000300.HIX\n000905.HIX\n000906.HIX\n399006.ZIX\n399005.ZIX","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"Comment":"","CommentCollapsed":true},{"Id":"-250","ModuleId":"BigQuantSpace.trade.trade-v4","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"handle_data","Value":"# 回测引擎:每日数据处理函数,每天执行一次\ndef bigquant_run(context, data):\n # 按日期过滤得到今日的预测数据\n prediction = context.prediction[\n context.prediction.date == data.current_dt.strftime('%Y-%m-%d')]\n sid = context.symbol('000906.HIX')\n if prediction.pred_label.values.sum()>1:\n \n context.order_target_percent(sid,1)\n else:\n context.order_target_percent(sid,0)","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"prepare","Value":"# 回测引擎:准备数据,只执行一次\ndef bigquant_run(context):\n pass\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"initialize","Value":"# 回测引擎:初始化函数,只执行一次\ndef bigquant_run(context):\n # 加载预测数据\n context.prediction = context.options['data'].read_df()\n\n # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数\n context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))\n # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)\n # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只\n stock_count = 5\n # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]\n context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])\n # 设置每只股票占用的最大资金比例\n context.max_cash_per_instrument = 0.2\n context.options['hold_days'] = 5\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_trading_start","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"volume_limit","Value":0.025,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"order_price_field_buy","Value":"open","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"order_price_field_sell","Value":"close","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"capital_base","Value":1000000,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"auto_cancel_non_tradable_orders","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"data_frequency","Value":"daily","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"price_type","Value":"后复权","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"product_type","Value":"股票","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"plot_charts","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"backtest_only","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"benchmark","Value":"000300.SHA","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-250"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"options_data","NodeId":"-250"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"history_ds","NodeId":"-250"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"benchmark_ds","NodeId":"-250"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"trading_calendar","NodeId":"-250"}],"OutputPortsInternal":[{"Name":"raw_perf","NodeId":"-250","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":19,"Comment":"","CommentCollapsed":true},{"Id":"-184","ModuleId":"BigQuantSpace.cluster_kmeans.cluster_kmeans-v1","ModuleParameters":[{"Name":"n_clusters","Value":"2","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"init","Value":"k-means++","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"n_init","Value":"3","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"iterations","Value":300,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"algorithm","Value":"auto","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"key_cols","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"workers","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"other_train_parameters","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"training_ds","NodeId":"-184"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-184"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"model","NodeId":"-184"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"predict_ds","NodeId":"-184"}],"OutputPortsInternal":[{"Name":"output_model","NodeId":"-184","OutputType":null},{"Name":"transform_trainds","NodeId":"-184","OutputType":null},{"Name":"predictions","NodeId":"-184","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"Comment":"","CommentCollapsed":true},{"Id":"-196","ModuleId":"BigQuantSpace.use_datasource.use_datasource-v1","ModuleParameters":[{"Name":"datasource_id","Value":"bar1d_index_CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-196"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-196"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-196","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":4,"Comment":"","CommentCollapsed":true},{"Id":"-219","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-219"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-219"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-219","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"Comment":"","CommentCollapsed":true},{"Id":"-227","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释,注释需单独一行\n# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征\nclose/shift(close,120)-1\nsum(volume,120)\n","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-227"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-227","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":5,"Comment":"","CommentCollapsed":true},{"Id":"-239","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-239"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-239"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-239","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":7,"Comment":"","CommentCollapsed":true},{"Id":"-248","ModuleId":"BigQuantSpace.use_datasource.use_datasource-v1","ModuleParameters":[{"Name":"datasource_id","Value":"bar1d_index_CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-248"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-248"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-248","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":8,"Comment":"","CommentCollapsed":true},{"Id":"-254","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-254"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-254","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":9,"Comment":"","CommentCollapsed":true},{"Id":"-257","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-257"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-257","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":10,"Comment":"","CommentCollapsed":true},{"Id":"-260","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2015-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2017-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"000300.HIX\n000905.HIX\n000906.HIX\n399006.ZIX\n399005.ZIX","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-260"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-260","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":11,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-8' Position='420.40606689453125,175.0761947631836,200,200'/><NodePosition Node='-250' Position='1071.5977172851562,796.5230102539062,200,200'/><NodePosition Node='-184' Position='770.023681640625,663.933349609375,200,200'/><NodePosition Node='-196' Position='425.8695068359375,305.2118225097656,200,200'/><NodePosition Node='-219' Position='580.6478271484375,418.1089172363281,200,200'/><NodePosition Node='-227' Position='746.3517456054688,290.6444091796875,200,200'/><NodePosition Node='-239' Position='1022.68310546875,415.79510498046875,200,200'/><NodePosition Node='-248' Position='1049.9968872070312,290.15155029296875,200,200'/><NodePosition Node='-254' Position='637.0963745117188,523.7223205566406,200,200'/><NodePosition Node='-257' Position='1002.6530151367188,526.8712463378906,200,200'/><NodePosition Node='-260' Position='1039.0707397460938,176.40421295166016,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
    In [59]:
    # 本代码由可视化策略环境自动生成 2019年3月21日 17:57
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    # 回测引擎:每日数据处理函数,每天执行一次
    def m19_handle_data_bigquant_run(context, data):
        # 按日期过滤得到今日的预测数据
        prediction = context.prediction[
            context.prediction.date == data.current_dt.strftime('%Y-%m-%d')]
        sid = context.symbol('000906.HIX')
        if prediction.pred_label.values.sum()>1:
            
            context.order_target_percent(sid,1)
        else:
            context.order_target_percent(sid,0)
    # 回测引擎:准备数据,只执行一次
    def m19_prepare_bigquant_run(context):
        pass
    
    # 回测引擎:初始化函数,只执行一次
    def m19_initialize_bigquant_run(context):
        # 加载预测数据
        context.prediction = context.options['data'].read_df()
    
        # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
        context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
        # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
        # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
        stock_count = 5
        # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
        context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])
        # 设置每只股票占用的最大资金比例
        context.max_cash_per_instrument = 0.2
        context.options['hold_days'] = 5
    
    
    m1 = M.instruments.v2(
        start_date='2010-01-01',
        end_date='2015-01-01',
        market='CN_STOCK_A',
        instrument_list="""000300.HIX
    000905.HIX
    000906.HIX
    399006.ZIX
    399005.ZIX""",
        max_count=0
    )
    
    m4 = M.use_datasource.v1(
        instruments=m1.data,
        datasource_id='bar1d_index_CN_STOCK_A',
        start_date='',
        end_date=''
    )
    
    m5 = M.input_features.v1(
        features="""
    # #号开始的表示注释,注释需单独一行
    # 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
    close/shift(close,120)-1
    sum(volume,120)
    """
    )
    
    m3 = M.derived_feature_extractor.v3(
        input_data=m4.data,
        features=m5.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m9 = M.dropnan.v1(
        input_data=m3.data
    )
    
    m11 = M.instruments.v2(
        start_date='2015-01-01',
        end_date='2017-01-01',
        market='CN_STOCK_A',
        instrument_list="""000300.HIX
    000905.HIX
    000906.HIX
    399006.ZIX
    399005.ZIX""",
        max_count=0
    )
    
    m8 = M.use_datasource.v1(
        instruments=m11.data,
        datasource_id='bar1d_index_CN_STOCK_A',
        start_date='',
        end_date=''
    )
    
    m7 = M.derived_feature_extractor.v3(
        input_data=m8.data,
        features=m5.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m10 = M.dropnan.v1(
        input_data=m7.data
    )
    
    m2 = M.cluster_kmeans.v1(
        training_ds=m9.data,
        features=m5.data,
        predict_ds=m10.data,
        n_clusters=2,
        init='k-means++',
        n_init=3,
        iterations=300,
        algorithm='auto',
        key_cols='date',
        workers=1,
        other_train_parameters={}
    )
    
    m19 = M.trade.v4(
        instruments=m11.data,
        options_data=m2.predictions,
        history_ds=m8.data,
        start_date='',
        end_date='',
        handle_data=m19_handle_data_bigquant_run,
        prepare=m19_prepare_bigquant_run,
        initialize=m19_initialize_bigquant_run,
        volume_limit=0.025,
        order_price_field_buy='open',
        order_price_field_sell='close',
        capital_base=1000000,
        auto_cancel_non_tradable_orders=True,
        data_frequency='daily',
        price_type='后复权',
        product_type='股票',
        plot_charts=True,
        backtest_only=False,
        benchmark='000300.SHA'
    )
    
    • 收益率-12.73%
    • 年化收益率-6.79%
    • 基准收益率-6.33%
    • 阿尔法-0.08
    • 贝塔0.27
    • 夏普比率-0.48
    • 胜率0.0
    • 盈亏比0.0
    • 收益波动率17.43%
    • 信息比率-0.02
    • 最大回撤28.19%

    (b31e9eaf4cb586bb5f0e) #5

    大神,您好!请问一下k均值聚类三个输出端输出的模型、预测数据、训练数据集分别是怎样的?有方法能查看输出的结果吗?


    (mi10) #6

    同问,聚类的例子好少