diff --git a/src/layer/hardsigmoid.cpp b/src/layer/hardsigmoid.cpp index fe02f30f8..060b08e63 100644 --- a/src/layer/hardsigmoid.cpp +++ b/src/layer/hardsigmoid.cpp @@ -24,6 +24,8 @@ HardSigmoid::HardSigmoid() int HardSigmoid::load_param(const ParamDict& pd) { + // tensorflow uses alpha,beta = 0.2, 0.5 + // pytorch uses alpha,beta = 1/6, 0.5 alpha = pd.get(0, 0.2f); beta = pd.get(1, 0.5f); lower = -beta / alpha; diff --git a/src/layer/hardswish.cpp b/src/layer/hardswish.cpp index fb064f43a..2bcc17d82 100644 --- a/src/layer/hardswish.cpp +++ b/src/layer/hardswish.cpp @@ -24,6 +24,8 @@ HardSwish::HardSwish() int HardSwish::load_param(const ParamDict& pd) { + // Note that tensorflow/pytorch use alpha,beta = 1/6, 0.5, not the default value here. + // You can setup them manually in .param file. alpha = pd.get(0, 0.2f); beta = pd.get(1, 0.5f); lower = -beta / alpha;