This page shows the usage of General Binary Classifier using a9a dataset.
create table model as select feature, avg(weight) as weight from ( select train_classifier( add_bias(features), label, "-loss logistic -iter 30" ) as (feature,weight) from a9a_train ) t group by feature;
create table predict as WITH exploded as ( select rowid, label, extract_feature(feature) as feature, extract_weight(feature) as value from a9a_test LATERAL VIEW explode(add_bias(features)) t AS feature ) select t.rowid, sigmoid(sum(m.weight * t.value)) as prob, (case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1.0 else 0.0 end) as label from exploded t LEFT OUTER JOIN model m ON (t.feature = m.feature) group by t.rowid;
create or replace view submit as select t.label as actual, p.label as predicted, p.prob as probability from a9a_test t JOIN predict p on (t.rowid = p.rowid); select sum(if(actual == predicted, 1, 0)) / count(1) as accuracy from submit;
The following table shows accuracy for changing optimizer by
-loss logistic -opt XXXXXX -reg l1 -iter 30 option:
Optimizers using momentum need to tune decay rate well. Default (Adagrad+RDA), AdaDelta, Adam, and AdamHD is worth trying in my experience.