Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: [sparse_weights] get for predict #4651

Merged
merged 25 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions test/core.vwtest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6021,7 +6021,7 @@
{
"id": 465,
"desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and saving model",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_model.vw",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_model.vw -q::",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's a total of 3 tests using --sparse_weights, might be the opportunity to beef it up - maybe unit tests?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

apart from that, would be cool to add some sparse benchmarks to master and see if they are affected by this change

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

benchmarks added, tests can be added in a separate PR

"diff_files": {
"stderr": "train-sets/ref/sparse_save_check.stderr",
"stdout": "train-sets/ref/sparse_save_check.stdout"
Expand All @@ -6033,7 +6033,7 @@
{
"id": 466,
"desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and loading model",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_model.vw",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_model.vw -q::",
"diff_files": {
"stderr": "train-sets/ref/sparse_load_check.stderr",
"stdout": "train-sets/ref/sparse_load_check.stdout"
Expand All @@ -6045,5 +6045,33 @@
"depends_on": [
465
]
},
{
"id": 467,
"desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and saving model with random_weights",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_random_model.vw -q:: --random_weights",
"diff_files": {
"stderr": "train-sets/ref/sparse_save_check_random.stderr",
"stdout": "train-sets/ref/sparse_save_check_random.stdout"
},
"input_files": [
"train-sets/cb_test.ldf"
]
},
{
"id": 468,
"desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and loading model with random_weights",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_random_model.vw -q:: --random_weights",
"diff_files": {
"stderr": "train-sets/ref/sparse_load_check_random.stderr",
"stdout": "train-sets/ref/sparse_load_check_random.stdout"
},
"input_files": [
"train-sets/cb_test.ldf",
"standard_sparse_random_model.vw"
],
"depends_on": [
467
]
}
]
9 changes: 5 additions & 4 deletions test/train-sets/ref/sparse_load_check.stderr
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
creating quadratic features for pairs: ::
using no cache
Reading datafile = train-sets/cb_test.ldf
num sources = 1
Expand All @@ -6,17 +7,17 @@ learning rate = 0.5
initial_t = 3
power_t = 0.5
cb_type = mtr
Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Input label = CB
Output pred = ACTION_PROBS
average since example example current current current
loss last counter weight label predict features
0.066667 0.066667 1 1.0 0:1:0.5 1:0.48 15
0.033333 0.000000 2 2.0 1:0:0.5 1:0.95 6
0.066667 0.066667 1 1.0 0:1:0.5 1:0.48 60
0.033333 0.000000 2 2.0 1:0:0.5 1:0.95 18

finished run
number of examples = 3
weighted example sum = 3.000000
weighted label sum = 0.000000
average loss = 0.033333
total feature number = 27
total feature number = 96
3 changes: 3 additions & 0 deletions test/train-sets/ref/sparse_load_check.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[warning] model file has set of {-q, --cubic, --interactions} settings stored, but they'll be OVERRIDDEN by set of {-q, --cubic, --interactions} settings from command line.
[warning] Any duplicate namespace interactions will be removed
You can use --leave_duplicate_interactions to disable this behaviour.
23 changes: 23 additions & 0 deletions test/train-sets/ref/sparse_load_check_random.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
creating quadratic features for pairs: ::
using no cache
Reading datafile = train-sets/cb_test.ldf
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 3
power_t = 0.5
cb_type = mtr
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Input label = CB
Output pred = ACTION_PROBS
average since example example current current current
loss last counter weight label predict features
0.066667 0.066667 1 1.0 0:1:0.5 1:0.93 60
0.033333 0.000000 2 2.0 1:0:0.5 0:0.95 18

finished run
number of examples = 3
weighted example sum = 3.000000
weighted label sum = 0.000000
average loss = 0.033333
total feature number = 96
3 changes: 3 additions & 0 deletions test/train-sets/ref/sparse_load_check_random.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[warning] model file has set of {-q, --cubic, --interactions} settings stored, but they'll be OVERRIDDEN by set of {-q, --cubic, --interactions} settings from command line.
[warning] Any duplicate namespace interactions will be removed
You can use --leave_duplicate_interactions to disable this behaviour.
9 changes: 5 additions & 4 deletions test/train-sets/ref/sparse_save_check.stderr
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
creating quadratic features for pairs: ::
final_regressor = standard_sparse_model.vw
using no cache
Reading datafile = train-sets/cb_test.ldf
Expand All @@ -7,17 +8,17 @@ learning rate = 0.5
initial_t = 0
power_t = 0.5
cb_type = mtr
Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Input label = CB
Output pred = ACTION_PROBS
average since example example current current current
loss last counter weight label predict features
0.666667 0.666667 1 1.0 0:1:0.5 0:0.33 15
0.333333 0.000000 2 2.0 1:0:0.5 1:0.95 6
0.666667 0.666667 1 1.0 0:1:0.5 0:0.33 60
0.333333 0.000000 2 2.0 1:0:0.5 1:0.95 18

finished run
number of examples = 3
weighted example sum = 3.000000
weighted label sum = 0.000000
average loss = 0.333333
total feature number = 27
total feature number = 96
2 changes: 2 additions & 0 deletions test/train-sets/ref/sparse_save_check.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[warning] Any duplicate namespace interactions will be removed
You can use --leave_duplicate_interactions to disable this behaviour.
24 changes: 24 additions & 0 deletions test/train-sets/ref/sparse_save_check_random.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
creating quadratic features for pairs: ::
final_regressor = standard_sparse_random_model.vw
using no cache
Reading datafile = train-sets/cb_test.ldf
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
cb_type = mtr
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Input label = CB
Output pred = ACTION_PROBS
average since example example current current current
loss last counter weight label predict features
0.066667 0.066667 1 1.0 0:1:0.5 1:0.93 60
0.033333 0.000000 2 2.0 1:0:0.5 0:0.95 18

finished run
number of examples = 3
weighted example sum = 3.000000
weighted label sum = 0.000000
average loss = 0.033333
total feature number = 96
2 changes: 2 additions & 0 deletions test/train-sets/ref/sparse_save_check_random.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[warning] Any duplicate namespace interactions will be removed
You can use --leave_duplicate_interactions to disable this behaviour.
6 changes: 6 additions & 0 deletions vowpalwabbit/core/include/vw/core/array_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ class parameters
else { return dense_weights[i]; }
}

inline VW::weight& get(size_t i)
bassmang marked this conversation as resolved.
Show resolved Hide resolved
{
if (sparse) { return sparse_weights.get(i); }
else { return dense_weights.get(i); }
}

template <typename Lambda>
void set_default(Lambda&& default_func)
{
Expand Down
4 changes: 4 additions & 0 deletions vowpalwabbit/core/include/vw/core/array_parameters_dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ class dense_parameters
inline const VW::weight& operator[](size_t i) const { return _begin.get()[i & _weight_mask]; }
inline VW::weight& operator[](size_t i) { return _begin.get()[i & _weight_mask]; }

// get() is only needed for sparse_weights, same as operator[] for dense_weights
inline const VW::weight& get(size_t i) const { return operator[](i); }
inline VW::weight& get(size_t i) { return operator[](i); }

VW_ATTR(nodiscard) static dense_parameters shallow_copy(const dense_parameters& input);
VW_ATTR(nodiscard) static dense_parameters deep_copy(const dense_parameters& input);

Expand Down
7 changes: 6 additions & 1 deletion vowpalwabbit/core/include/vw/core/array_parameters_sparse.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,14 @@ class sparse_parameters
const_iterator cbegin() const { return const_iterator(_map.begin()); }
const_iterator cend() const { return const_iterator(_map.end()); }

// operator[] will find weight in _map and return and insert a default value if not found. Does alter _map.
inline VW::weight& operator[](size_t i) { return *(get_or_default_and_get(i)); }

inline const VW::weight& operator[](size_t i) const { return *(get_or_default_and_get(i)); }

// get() will find weight in _map and return a default value if not found. Does not alter _map.
inline VW::weight& get(size_t i) { return *(get_impl(i)); };
inline const VW::weight& get(size_t i) const { return *(get_impl(i)); };

inline VW::weight& strided_index(size_t index) { return operator[](index << _stride_shift); }
inline const VW::weight& strided_index(size_t index) const { return operator[](index << _stride_shift); }

Expand Down Expand Up @@ -119,6 +123,7 @@ class sparse_parameters
// It is marked const so it can be used from both const and non const operator[]
// The map itself is mutable to facilitate this
VW::weight* get_or_default_and_get(size_t i) const;
VW::weight* get_impl(size_t i) const;
};
} // namespace VW
using sparse_parameters VW_DEPRECATED("sparse_parameters moved into VW namespace") = VW::sparse_parameters;
4 changes: 2 additions & 2 deletions vowpalwabbit/core/include/vw/core/gd_predict.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& da
{
for (const auto& f : fs)
{
VW::weight& w = weights[(f.index() + offset)];
VW::weight& w = weights[f.index() + offset];
FuncT(dat, mult * f.value(), w);
}
}
Expand All @@ -46,7 +46,7 @@ template <class DataT, void (*FuncT)(DataT&, float, float), class WeightsT>
inline void foreach_feature(
const WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.)
{
for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights[static_cast<size_t>(f.index() + offset)]); }
for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights.get(static_cast<size_t>(f.index() + offset))); }
}

template <class DataT, class WeightOrIndexT, void (*FuncT)(DataT&, float, WeightOrIndexT),
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/core/include/vw/core/interactions_predict.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ inline void call_func_t(DataT& dat, WeightsT& weights, const float ft_value, con
template <class DataT, void (*FuncT)(DataT&, const float, float), class WeightsT>
inline void call_func_t(DataT& dat, const WeightsT& weights, const float ft_value, const uint64_t ft_idx)
{
FuncT(dat, ft_value, weights[static_cast<size_t>(ft_idx)]);
FuncT(dat, ft_value, weights.get(static_cast<size_t>(ft_idx)));
}

template <class DataT, void (*FuncT)(DataT&, float, uint64_t), class WeightsT>
Expand Down
26 changes: 26 additions & 0 deletions vowpalwabbit/core/src/array_parameters_sparse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,32 @@ VW::weight* VW::sparse_parameters::get_or_default_and_get(size_t i) const
return iter->second.get();
}

VW::weight* VW::sparse_parameters::get_impl(size_t i) const
{
static auto default_value =
std::shared_ptr<VW::weight>(VW::details::calloc_mergable_or_throw<VW::weight>(stride()), free);
uint64_t index = i & _weight_mask;
auto iter = _map.find(index);
rajan-chari marked this conversation as resolved.
Show resolved Hide resolved
if (iter == _map.end())
{
// Add entry to map if _default_func is defined
if (_default_func != nullptr)
{
// memory allocated by calloc should be freed by C free()
_map.insert(std::make_pair(
index, std::shared_ptr<VW::weight>(VW::details::calloc_mergable_or_throw<VW::weight>(stride()), free)));
iter = _map.find(index);
_default_func(iter->second.get(), index);
return iter->second.get();
}
// Return default value if _default_func is not defined
return default_value.get();
}

// Get entry if it exists in the map
return iter->second.get();
}

VW::sparse_parameters::sparse_parameters(size_t length, uint32_t stride_shift)
: _weight_mask((length << stride_shift) - 1), _stride_shift(stride_shift), _default_func(nullptr)
{
Expand Down
15 changes: 6 additions & 9 deletions vowpalwabbit/core/src/parse_regressor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,23 +94,20 @@ void initialize_regressor(VW::workspace& all, T& weights)
}
else if (all.initial_weights_config.initial_weight != 0.)
{
auto initial_weight = all.initial_weights_config.initial_weight;
auto initial_value_weight_initializer = [initial_weight](VW::weight* weights, uint64_t /*index*/)
{ weights[0] = initial_weight; };
auto initial_value_weight_initializer = [&all](VW::weight* weights, uint64_t /*index*/)
{ weights[0] = all.initial_weights_config.initial_weight; };
weights.set_default(initial_value_weight_initializer);
}
else if (all.initial_weights_config.random_positive_weights)
{
auto rand_state = *all.get_random_state();
auto random_positive = [&rand_state](VW::weight* weights, uint64_t)
{ weights[0] = 0.1f * rand_state.get_and_update_random(); };
auto random_positive = [&all](VW::weight* weights, uint64_t)
{ weights[0] = 0.1f * all.get_random_state()->get_and_update_random(); };
weights.set_default(random_positive);
}
else if (all.initial_weights_config.random_weights)
{
auto rand_state = *all.get_random_state();
auto random_neg_pos = [&rand_state](VW::weight* weights, uint64_t)
{ weights[0] = rand_state.get_and_update_random() - 0.5f; };
auto random_neg_pos = [&all](VW::weight* weights, uint64_t)
{ weights[0] = all.get_random_state()->get_and_update_random() - 0.5f; };
weights.set_default(random_neg_pos);
}
else if (all.initial_weights_config.normal_weights) { weights.set_default(&initialize_weights_as_polar_normal); }
Expand Down
2 changes: 2 additions & 0 deletions vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ class lazy_gaussian
{
public:
inline float operator[](uint64_t index) const { return VW::details::merand48_boxmuller(index); }
// get() is only needed for sparse_weights, same as operator[] for lazy_gaussian
inline float get(uint64_t index) const { return operator[](index); }
};

inline void vec_add_with_norm(std::pair<float, float>& p, float fx, float fw)
Expand Down
Loading