VW源码阅读笔记

global_data.h

struct vw
{ shared_data* sd;
  parser* p;
#ifndef _WIN32
  pthread_t parse_thread;
#else
  HANDLE parse_thread;
#endif
  AllReduceType all_reduce_type;
  AllReduce* all_reduce;

  LEARNER::base_learner* l;//the top level learner
  LEARNER::base_learner* scorer;//a scoring function
  LEARNER::base_learner* cost_sensitive;//a cost sensitive learning algorithm.

  void learn(example*);

  void (*set_minmax)(shared_data* sd, float label);

  size_t current_pass;

  uint32_t num_bits; // log_2 of the number of features.
  bool default_bits;

  string data_filename; // was vm["data"]

  bool daemon;
  size_t num_children;

  bool save_per_pass;
  float initial_weight;
  float initial_constant;

  bool bfgs;
  bool hessian_on;

  bool save_resume;
  version_struct model_file_ver;
  double normalized_sum_norm_x;
  bool vw_is_main;  // true if vw is executable; false in library mode

  po::options_description opts;
  po::options_description* new_opts;
  po::variables_map vm;
  std::stringstream* file_options;
  vector<std::string> args;

  void* /*Search::search*/ searchstr;

  uint32_t wpp;

  int stdout_fileno;

  std::string per_feature_regularizer_input;
  std::string per_feature_regularizer_output;
  std::string per_feature_regularizer_text;

  float l1_lambda; //the level of l_1 regularization to impose.
  float l2_lambda; //the level of l_2 regularization to impose.
  float power_t;//the power on learning rate decay.
  int reg_mode;

  size_t pass_length;
  size_t numpasses;
  size_t passes_complete;
  size_t parse_mask; // 1 << num_bits -1
  bool permutations; // if true - permutations of features generated instead of simple combinations. false by default
  v_array<v_string> interactions; // interactions of namespaces to cross.
  std::vector<std::string> pairs; // pairs of features to cross.
  std::vector<std::string> triples; // triples of features to cross.
  bool ignore_some;
  bool ignore[256];//a set of namespaces to ignore

  bool redefine_some;          // --redefine param was used
  unsigned char redefine[256]; // keeps new chars for amespaces

  std::vector<std::string> ngram_strings;
  std::vector<std::string> skip_strings;
  uint32_t ngram[256];//ngrams to generate.
  uint32_t skips[256];//skips in ngrams.
  std::vector<std::string> limit_strings; // descriptor of feature limits
  uint32_t limit[256];//count to limit features by
  uint32_t affix_features[256]; // affixes to generate (up to 8 per namespace)
  bool     spelling_features[256]; // generate spelling features for which namespace
  vector<string> dictionary_path;  // where to look for dictionaries
  vector<feature_dict*> namespace_dictionaries[256]; // each namespace has a list of dictionaries attached to it
  vector<dictionary_info> loaded_dictionaries; // which dictionaries have we loaded from a file to memory?

  bool multilabel_prediction;
  bool audit;//should I print lots of debugging information?
  bool quiet;//Should I suppress progress-printing of updates?
  bool training;//Should I train if lable data is available?
  bool active;
  bool adaptive;//Should I use adaptive individual learning rates?
  bool normalized_updates; //Should every feature be normalized
  bool invariant_updates; //Should we use importance aware/safe updates
  size_t random_seed;
  bool random_weights;
  bool random_positive_weights; // for initialize_regressor w/ new_mf
  bool add_constant;
  bool nonormalize;
  bool do_reset_source;
  bool holdout_set_off;
  bool early_terminate;
  uint32_t holdout_period;
  uint32_t holdout_after;
  size_t check_holdout_every_n_passes;  // default: 1, but search might want to set it higher if you spend multiple passes learning a single policy

  size_t normalized_idx; //offset idx where the norm is stored (1 or 2 depending on whether adaptive is true)

  uint32_t lda;

  std::string text_regressor_name;
  std::string inv_hash_regressor_name;

  size_t length () { return ((size_t)1) << num_bits; };

  v_array<LEARNER::base_learner* (*)(vw&)> reduction_stack;

  //Prediction output
  v_array<int> final_prediction_sink; // set to send global predictions to.
  int raw_prediction; // file descriptors for text output.

  void (*print)(int,float,float,v_array<char>);
  void (*print_text)(int, string, v_array<char>);
  loss_function* loss;

  char* program_name;

  bool stdin_off;

  //runtime accounting variables.
  float initial_t;
  float eta;//learning rate control.
  float eta_decay_rate;
  time_t init_time;

  std::string final_regressor_name;
  regressor reg;

  size_t max_examples; // for TLC

  bool hash_inv;
  bool print_invert;

  // Set by --progress <arg>
  bool  progress_add;   // additive (rather than multiplicative) progress dumps
  float progress_arg;   // next update progress dump multiplier

  bool seeded; // whether the instance is sharing model state with others

  std::map< std::string, size_t> name_index_map;

  vw();
};

你可能感兴趣的:(源码)