Skip to content

Commit 5389ce2

Browse files
committed
ota: Better usage of App rollback feature
- Mark new app firmware as valid only if MQTT connection is successful. - Report OTA success to RainMaker backend only after successful connection after booting into new firmware. - If the new firmware cannot connect to MQTT within specified time (Default 90 seconds, configuratble), it will roll back into older firmware. Even if the timer logic fails for some reason, a reboot will also trigger the rollback.
1 parent 2f0f5e0 commit 5389ce2

File tree

5 files changed

+170
-25
lines changed

5 files changed

+170
-25
lines changed

Diff for: components/esp_rainmaker/Kconfig.projbuild

+10
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,16 @@ menu "ESP RainMaker Config"
199199
However, please ensure that your application has enough memory headroom to allow this,
200200
else, the OTA may fail.
201201

202+
config ESP_RMAKER_OTA_ROLLBACK_WAIT_PERIOD
203+
int "OTA Rollback Wait Period (Seconds)"
204+
default 90
205+
range 30 600
206+
help
207+
After an OTA Update, if CONFIG_BOOTLOADER_APP_ROLLBACK_ENABLE is set, then the firmware will wait for MQTT
208+
connection to mark the new firmware as valid. However, if it is not able to do so within
209+
this wait period (in seconds), the firmware will be marked as invalid and the older
210+
firmware will be booted into.
211+
202212
endmenu
203213

204214
menu "ESP RainMaker Scheduling"

Diff for: components/esp_rainmaker/src/ota/esp_rmaker_ota.c

+118-21
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,24 @@
1414

1515
#include <string.h>
1616
#include <freertos/FreeRTOS.h>
17+
#include <freertos/timers.h>
1718
#include <freertos/task.h>
19+
#include <esp_event.h>
1820
#include <esp_log.h>
1921
#include <esp_ota_ops.h>
2022
#include <esp_partition.h>
2123
#include <esp_https_ota.h>
2224
#include <esp_wifi_types.h>
2325
#include <esp_wifi.h>
26+
#include <nvs.h>
2427
#if CONFIG_BT_ENABLED
2528
#include <esp_bt.h>
2629
#endif /* CONFIG_BT_ENABLED */
2730

2831
#include <esp_rmaker_utils.h>
32+
#include <esp_rmaker_common_events.h>
33+
34+
#include "esp_rmaker_internal.h"
2935
#include "esp_rmaker_ota_internal.h"
3036

3137
#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0)
@@ -44,11 +50,12 @@
4450

4551
#endif /* !IDF4.4 */
4652
static const char *TAG = "esp_rmaker_ota";
53+
static TimerHandle_t s_ota_rollback_timer;
4754

4855
#define OTA_REBOOT_TIMER_SEC 10
4956
#define DEF_HTTP_TX_BUFFER_SIZE 1024
5057
#define DEF_HTTP_RX_BUFFER_SIZE CONFIG_ESP_RMAKER_OTA_HTTP_RX_BUFFER_SIZE
51-
58+
#define RMAKER_OTA_ROLLBACK_WAIT_PERIOD CONFIG_ESP_RMAKER_OTA_ROLLBACK_WAIT_PERIOD
5259
extern const char esp_rmaker_ota_def_cert[] asm("_binary_rmaker_ota_server_crt_start");
5360
const char *ESP_RMAKER_OTA_DEFAULT_SERVER_CERT = esp_rmaker_ota_def_cert;
5461
char *esp_rmaker_ota_status_to_string(ota_status_t status)
@@ -71,6 +78,8 @@ char *esp_rmaker_ota_status_to_string(ota_status_t status)
7178
}
7279
esp_err_t esp_rmaker_ota_report_status(esp_rmaker_ota_handle_t ota_handle, ota_status_t status, char *additional_info)
7380
{
81+
ESP_LOGI(TAG, "Reporting %s: %s", esp_rmaker_ota_status_to_string(status), additional_info);
82+
7483
if (!ota_handle) {
7584
return ESP_FAIL;
7685
}
@@ -267,7 +276,19 @@ esp_err_t esp_rmaker_ota_default_cb(esp_rmaker_ota_handle_t ota_handle, esp_rmak
267276
ota_finish_err = esp_https_ota_finish(https_ota_handle);
268277
if ((err == ESP_OK) && (ota_finish_err == ESP_OK)) {
269278
ESP_LOGI(TAG, "OTA upgrade successful. Rebooting in %d seconds...", OTA_REBOOT_TIMER_SEC);
279+
#ifdef CONFIG_BOOTLOADER_APP_ROLLBACK_ENABLE
280+
nvs_handle handle;
281+
esp_err_t err = nvs_open_from_partition(ESP_RMAKER_NVS_PART_NAME, RMAKER_OTA_NVS_NAMESPACE, NVS_READWRITE, &handle);
282+
if (err == ESP_OK) {
283+
uint8_t ota_update = 1;
284+
nvs_set_blob(handle, RMAKER_OTA_UPDATE_FLAG_NVS_NAME, &ota_update, sizeof(ota_update));
285+
nvs_close(handle);
286+
}
287+
/* Success will be reported after a reboot since Rollback is enabled */
288+
esp_rmaker_ota_report_status(ota_handle, OTA_STATUS_IN_PROGRESS, "Rebooting into new firmware");
289+
#else
270290
esp_rmaker_ota_report_status(ota_handle, OTA_STATUS_SUCCESS, "OTA Upgrade finished successfully");
291+
#endif
271292
esp_rmaker_reboot(OTA_REBOOT_TIMER_SEC);
272293
return ESP_OK;
273294
} else {
@@ -284,6 +305,101 @@ esp_err_t esp_rmaker_ota_default_cb(esp_rmaker_ota_handle_t ota_handle, esp_rmak
284305
return ESP_FAIL;
285306
}
286307

308+
309+
static void event_handler(void* arg, esp_event_base_t event_base,
310+
int32_t event_id, void* event_data)
311+
{
312+
esp_rmaker_ota_t *ota = (esp_rmaker_ota_t *)arg;
313+
esp_event_handler_unregister(RMAKER_COMMON_EVENT, RMAKER_MQTT_EVENT_CONNECTED, &event_handler);
314+
esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota, OTA_STATUS_SUCCESS, "OTA Upgrade finished and verified successfully");
315+
esp_ota_mark_app_valid_cancel_rollback();
316+
ota->ota_in_progress = false;
317+
if (s_ota_rollback_timer) {
318+
xTimerStop(s_ota_rollback_timer, portMAX_DELAY);
319+
xTimerDelete(s_ota_rollback_timer, portMAX_DELAY);
320+
s_ota_rollback_timer = NULL;
321+
}
322+
if (ota->type == OTA_USING_TOPICS) {
323+
esp_rmaker_ota_fetch();
324+
}
325+
}
326+
327+
static void esp_ota_rollback(TimerHandle_t handle)
328+
{
329+
ESP_LOGE(TAG, "Could not verify firmware even after %d seconds since boot-up. Rolling back.",
330+
RMAKER_OTA_ROLLBACK_WAIT_PERIOD);
331+
esp_ota_mark_app_invalid_rollback_and_reboot();
332+
}
333+
334+
static esp_err_t esp_ota_check_for_mqtt(esp_rmaker_ota_t *ota)
335+
{
336+
s_ota_rollback_timer = xTimerCreate("ota_rollback_tm", (RMAKER_OTA_ROLLBACK_WAIT_PERIOD * 1000) / portTICK_PERIOD_MS,
337+
pdTRUE, NULL, esp_ota_rollback);
338+
if (s_ota_rollback_timer) {
339+
xTimerStart(s_ota_rollback_timer, 0);
340+
} else {
341+
ESP_LOGW(TAG, "Could not create rollback timer. Will require manual reboot if firmware verification fails");
342+
}
343+
344+
return esp_event_handler_register(RMAKER_COMMON_EVENT, RMAKER_MQTT_EVENT_CONNECTED, &event_handler, ota);
345+
}
346+
347+
static void esp_rmaker_ota_manage_rollback(esp_rmaker_ota_config_t *ota_config, esp_rmaker_ota_t *ota)
348+
{
349+
const esp_partition_t *running = esp_ota_get_running_partition();
350+
esp_ota_img_states_t ota_state;
351+
if (esp_ota_get_state_partition(running, &ota_state) == ESP_OK) {
352+
ESP_LOGI(TAG, "OTA state = %d", ota_state);
353+
/* Not checking for CONFIG_BOOTLOADER_APP_ROLLBACK_ENABLE here because the firmware may have
354+
* it disabled, but bootloader may have it enabled, in which case, we will have to
355+
* handle this state.
356+
*/
357+
if (ota_state == ESP_OTA_IMG_PENDING_VERIFY) {
358+
ESP_LOGI(TAG, "First Boot after an OTA");
359+
/* Run diagnostic function */
360+
bool diagnostic_is_ok = true;
361+
if (ota_config->ota_diag) {
362+
diagnostic_is_ok = ota_config->ota_diag();
363+
}
364+
if (diagnostic_is_ok) {
365+
ESP_LOGI(TAG, "Diagnostics completed successfully! Continuing execution ...");
366+
/* Will not mark the image valid here immediately, but instead will wait for
367+
* MQTT connection. The below flag will tell the OTA functions that the earlier
368+
* OTA is still in progress.
369+
*/
370+
ota->ota_in_progress = true;
371+
esp_ota_check_for_mqtt(ota);
372+
} else {
373+
ESP_LOGE(TAG, "Diagnostics failed! Start rollback to the previous version ...");
374+
esp_ota_mark_app_invalid_rollback_and_reboot();
375+
}
376+
#ifdef CONFIG_BOOTLOADER_APP_ROLLBACK_ENABLE
377+
} else {
378+
/* If rollback is enabled, and the ota update flag is found, it means that the firmware was rolled back
379+
*/
380+
nvs_handle handle;
381+
esp_err_t err = nvs_open_from_partition(ESP_RMAKER_NVS_PART_NAME, RMAKER_OTA_NVS_NAMESPACE, NVS_READWRITE, &handle);
382+
if (err == ESP_OK) {
383+
uint8_t ota_update = 0;
384+
size_t len = sizeof(ota_update);
385+
if ((err = nvs_get_blob(handle, RMAKER_OTA_UPDATE_FLAG_NVS_NAME, &ota_update, &len)) == ESP_OK) {
386+
ota->rolled_back = true;
387+
nvs_erase_key(handle, RMAKER_OTA_UPDATE_FLAG_NVS_NAME);
388+
if (ota->type == OTA_USING_PARAMS) {
389+
/* Calling this only for OTA_USING_PARAMS, because for OTA_USING_TOPICS,
390+
* the work queue function will manage the status reporting later.
391+
*/
392+
esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota,
393+
OTA_STATUS_REJECTED, "Firmware rolled back");
394+
}
395+
}
396+
nvs_close(handle);
397+
}
398+
#endif
399+
}
400+
}
401+
}
402+
287403
static const esp_rmaker_ota_config_t ota_default_config = {
288404
.server_cert = esp_rmaker_ota_def_cert,
289405
};
@@ -307,26 +423,6 @@ esp_err_t esp_rmaker_ota_enable(esp_rmaker_ota_config_t *ota_config, esp_rmaker_
307423
ESP_LOGE(TAG, "Failed to allocate memory for esp_rmaker_ota_t");
308424
return ESP_ERR_NO_MEM;
309425
}
310-
const esp_partition_t *running = esp_ota_get_running_partition();
311-
esp_ota_img_states_t ota_state;
312-
if (esp_ota_get_state_partition(running, &ota_state) == ESP_OK) {
313-
ESP_LOGI(TAG, "OTA state = %d", ota_state);
314-
if (ota_state == ESP_OTA_IMG_PENDING_VERIFY) {
315-
ESP_LOGI(TAG, "First Boot after an OTA");
316-
/* Run diagnostic function */
317-
bool diagnostic_is_ok = true;
318-
if (ota_config->ota_diag) {
319-
diagnostic_is_ok = ota_config->ota_diag();
320-
}
321-
if (diagnostic_is_ok) {
322-
ESP_LOGI(TAG, "Diagnostics completed successfully! Continuing execution ...");
323-
esp_ota_mark_app_valid_cancel_rollback();
324-
} else {
325-
ESP_LOGE(TAG, "Diagnostics failed! Start rollback to the previous version ...");
326-
esp_ota_mark_app_invalid_rollback_and_reboot();
327-
}
328-
}
329-
}
330426
if (ota_config->ota_cb) {
331427
ota->ota_cb = ota_config->ota_cb;
332428
} else {
@@ -342,6 +438,7 @@ esp_err_t esp_rmaker_ota_enable(esp_rmaker_ota_config_t *ota_config, esp_rmaker_
342438
err = esp_rmaker_ota_enable_using_topics(ota);
343439
}
344440
if (err == ESP_OK) {
441+
esp_rmaker_ota_manage_rollback(ota_config, ota);
345442
ota_init_done = true;
346443
} else {
347444
free(ota);

Diff for: components/esp_rainmaker/src/ota/esp_rmaker_ota_internal.h

+6
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
#include <stdint.h>
1818
#include <esp_err.h>
1919
#include <esp_rmaker_ota.h>
20+
21+
#define RMAKER_OTA_NVS_NAMESPACE "rmaker_ota"
22+
#define RMAKER_OTA_JOB_ID_NVS_NAME "rmaker_ota_id"
23+
#define RMAKER_OTA_UPDATE_FLAG_NVS_NAME "ota_update"
24+
2025
typedef struct {
2126
esp_rmaker_ota_type_t type;
2227
esp_rmaker_ota_cb_t ota_cb;
@@ -25,6 +30,7 @@ typedef struct {
2530
char *url;
2631
int filesize;
2732
bool ota_in_progress;
33+
bool rolled_back;
2834
ota_status_t last_reported_status;
2935
void *transient_priv;
3036
} esp_rmaker_ota_t;

Diff for: components/esp_rainmaker/src/ota/esp_rmaker_ota_using_params.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,10 @@ static esp_err_t esp_rmaker_ota_service_cb(const esp_rmaker_device_t *device, co
7979

8080
esp_err_t esp_rmaker_ota_report_status_using_params(esp_rmaker_ota_handle_t ota_handle, ota_status_t status, char *additional_info)
8181
{
82-
if (!ota_handle) {
82+
const esp_rmaker_device_t *device = esp_rmaker_node_get_device_by_name(esp_rmaker_get_node(), ESP_RMAKER_OTA_SERV_NAME);
83+
if (!device) {
8384
return ESP_FAIL;
8485
}
85-
esp_rmaker_ota_t *ota = (esp_rmaker_ota_t *)ota_handle;
86-
const esp_rmaker_device_t *device = (esp_rmaker_device_t *)ota->transient_priv;
8786
esp_rmaker_param_t *info_param = esp_rmaker_device_get_param_by_type(device, ESP_RMAKER_PARAM_OTA_INFO);
8887
esp_rmaker_param_t *status_param = esp_rmaker_device_get_param_by_type(device, ESP_RMAKER_PARAM_OTA_STATUS);
8988

Diff for: components/esp_rainmaker/src/ota/esp_rmaker_ota_using_topics.c

+34-1
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
#include <json_generator.h>
1818
#include <esp_log.h>
1919
#include <esp_system.h>
20+
#include <nvs.h>
2021
#include <esp_rmaker_work_queue.h>
2122
#include <esp_rmaker_core.h>
2223
#include <esp_rmaker_ota.h>
2324

25+
#include "esp_rmaker_internal.h"
2426
#include "esp_rmaker_ota_internal.h"
2527
#include "esp_rmaker_mqtt.h"
2628

@@ -53,13 +55,29 @@ esp_err_t esp_rmaker_ota_report_status_using_topics(esp_rmaker_ota_handle_t ota_
5355
json_gen_start_object(&jstr);
5456
if (ota->transient_priv) {
5557
json_gen_obj_set_string(&jstr, "ota_job_id", (char *)ota->transient_priv);
58+
} else {
59+
/* This will get executed only when the OTA status is being reported after a reboot, either to
60+
* indicate successful verification of new firmware, or to indicate that firmware was rolled back
61+
*/
62+
nvs_handle handle;
63+
esp_err_t err = nvs_open_from_partition(ESP_RMAKER_NVS_PART_NAME, RMAKER_OTA_NVS_NAMESPACE, NVS_READWRITE, &handle);
64+
if (err == ESP_OK) {
65+
char job_id[64] = {0};
66+
size_t len = sizeof(job_id);
67+
if ((err = nvs_get_blob(handle, RMAKER_OTA_JOB_ID_NVS_NAME, job_id, &len)) == ESP_OK) {
68+
json_gen_obj_set_string(&jstr, "ota_job_id", job_id);
69+
nvs_erase_key(handle, RMAKER_OTA_JOB_ID_NVS_NAME);
70+
}
71+
nvs_close(handle);
72+
}
5673
}
5774
json_gen_obj_set_string(&jstr, "status", esp_rmaker_ota_status_to_string(status));
5875
json_gen_obj_set_string(&jstr, "additional_info", additional_info);
5976
json_gen_end_object(&jstr);
6077
json_gen_str_end(&jstr);
6178

6279
char publish_topic[100];
80+
ESP_LOGI(TAG, "%s",publish_payload);
6381
snprintf(publish_topic, sizeof(publish_topic), "node/%s/%s", node_id, OTASTATUS_TOPIC_SUFFIX);
6482
esp_err_t err = esp_rmaker_mqtt_publish(publish_topic, publish_payload, strlen(publish_payload),
6583
RMAKER_MQTT_QOS1, NULL);
@@ -129,6 +147,12 @@ static void ota_url_handler(const char *topic, void *payload, size_t payload_len
129147
goto end;
130148
}
131149
json_obj_get_string(&jctx, "ota_job_id", ota_job_id, len);
150+
nvs_handle handle;
151+
esp_err_t err = nvs_open_from_partition(ESP_RMAKER_NVS_PART_NAME, RMAKER_OTA_NVS_NAMESPACE, NVS_READWRITE, &handle);
152+
if (err == ESP_OK) {
153+
nvs_set_blob(handle, RMAKER_OTA_JOB_ID_NVS_NAME, ota_job_id, strlen(ota_job_id));
154+
nvs_close(handle);
155+
}
132156
ESP_LOGI(TAG, "OTA Job ID: %s", ota_job_id);
133157
ota->transient_priv = ota_job_id;
134158
len = 0;
@@ -171,6 +195,7 @@ static void ota_url_handler(const char *topic, void *payload, size_t payload_len
171195

172196
esp_err_t esp_rmaker_ota_fetch(void)
173197
{
198+
ESP_LOGI(TAG, "Fetching OTA details, if any.");
174199
esp_rmaker_node_info_t *info = esp_rmaker_node_get_info(esp_rmaker_get_node());
175200
if (!info) {
176201
ESP_LOGE(TAG, "Node info not found. Cant send otafetch request");
@@ -217,9 +242,17 @@ static esp_err_t esp_rmaker_ota_subscribe(void *priv_data)
217242

218243
static void esp_rmaker_ota_work_fn(void *priv_data)
219244
{
245+
esp_rmaker_ota_t *ota = (esp_rmaker_ota_t *)priv_data;
246+
/* If the firmware was rolled back, indicate that first */
247+
if (ota->rolled_back) {
248+
esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota, OTA_STATUS_REJECTED, "Firmware rolled back");
249+
ota->rolled_back = false;
250+
}
220251
esp_rmaker_ota_subscribe(priv_data);
221252
#ifdef CONFIG_ESP_RMAKER_OTA_AUTOFETCH
222-
esp_rmaker_ota_fetch();
253+
if (ota->ota_in_progress != true) {
254+
esp_rmaker_ota_fetch();
255+
}
223256
if (ota_autofetch_period > 0) {
224257
esp_timer_create_args_t autofetch_timer_conf = {
225258
.callback = esp_rmaker_ota_timer_cb_fetch,

0 commit comments

Comments
 (0)