diff mbox series

[BlueZ] gatt-client: Do not continue service discovery if disconnected

Message ID 20201105002412.37720-1-sonnysasaka@chromium.org
State New
Headers show
Series [BlueZ] gatt-client: Do not continue service discovery if disconnected | expand

Commit Message

Sonny Sasaka Nov. 5, 2020, 12:24 a.m. UTC
If device is disconnected, discovery_complete_op should not continue
processing since this can lead to a crash because the GATT-related
objects may already be freed.

Sample crash stack trace:
0  gatt_db_service_get_handles (service=0x1751130, service=0x1751130, end_handle=0x7ffcd600806e, start_handle=0x7ffcd600806c) at src/shared/gatt-db.c:569
1  gatt_db_attribute_get_service_data (attrib=<optimized out>, start_handle=0x7ffcd600806c, end_handle=0x7ffcd600806e, primary=0x0, uuid=0x0) at src/shared/gatt-db.c:1657
2  0x00000000004983a8 in discovery_op_complete (op=op@entry=0x173b320, success=<optimized out>, err=err@entry=10 '\n') at src/shared/gatt-client.c:406
3  0x000000000049a548 in discover_descs_cb (success=<optimized out>, att_ecode=<optimized out>, result=<optimized out>, user_data=0x173b320) at src/shared/gatt-client.c:915
4  0x00000000004a1d87 in discovery_op_complete (op=0x1748450, success=<optimized out>, ecode=<optimized out>) at src/shared/gatt-helpers.c:615
5  0x00000000004a2379 in discover_descs_cb (opcode=<optimized out>, pdu=0x174d551, length=<optimized out>, user_data=0x1748450) at src/shared/gatt-helpers.c:1465
6  0x00000000004966db in handle_rsp (pdu_len=4, pdu=<optimized out>, opcode=<optimized out>, chan=0x17483c0) at src/shared/att.c:814
7  can_read_data (io=<optimized out>, user_data=0x17483c0) at src/shared/att.c:1011
8  0x00000000004a0853 in watch_callback (channel=<optimized out>, cond=<optimized out>, user_data=<optimized out>) at src/shared/io-glib.c:157
9  0x00007fb3f2d7fe87 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0
10 0x00007fb3f2d80230 in ?? () from /usr/lib64/libglib-2.0.so.0
11 0x00007fb3f2d80542 in g_main_loop_run () from /usr/lib64/libglib-2.0.so.0
12 0x00000000004a0e25 in mainloop_run () at src/shared/mainloop-glib.c:66
13 0x00000000004a11f2 in mainloop_run_with_signal (func=func@entry=0x43f200 <signal_callback>, user_data=user_data@entry=0x0) at src/shared/mainloop-notify.c:188
14 0x000000000040c72e in main (argc=<optimized out>, argv=<optimized out>) at src/main.c:959

Reviewed-By: Archie Pusaka <apusaka@chromium.org>

---
 src/device.c             |  3 +++
 src/shared/gatt-client.c | 20 ++++++++++++++++++++
 src/shared/gatt-client.h |  2 ++
 3 files changed, 25 insertions(+)

Comments

bluez.test.bot@gmail.com Nov. 5, 2020, 12:53 a.m. UTC | #1
This is automated email and please do not reply to this email!

Dear submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
This is a CI test results with your patch series:
PW Link:https://patchwork.kernel.org/project/bluetooth/list/?series=377767

---Test result---

##############################
Test: CheckPatch - FAIL
Output:
gatt-client: Do not continue service discovery if disconnected
WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#12: 
0  gatt_db_service_get_handles (service=0x1751130, service=0x1751130, end_handle=0x7ffcd600806e, start_handle=0x7ffcd600806c) at src/shared/gatt-db.c:569

WARNING:BAD_SIGN_OFF: 'Reviewed-by:' is the preferred signature form
#28: 
Reviewed-By: Archie Pusaka <apusaka@chromium.org>


- total: 0 errors, 2 warnings, 73 lines checked

NOTE: For some of the reported defects, checkpatch may be able to
      mechanically convert to the typical style using --fix or --fix-inplace.

"[PATCH] gatt-client: Do not continue service discovery if" has style problems, please review.

NOTE: Ignored message types: COMMIT_MESSAGE COMPLEX_MACRO CONST_STRUCT FILE_PATH_CHANGES MISSING_SIGN_OFF PREFER_PACKED SPLIT_STRING SSCANF_TO_KSTRTO

NOTE: If any of the errors are false positives, please report
      them to the maintainer, see CHECKPATCH in MAINTAINERS.


##############################
Test: CheckGitLint - FAIL
Output:
gatt-client: Do not continue service discovery if disconnected
8: B1 Line exceeds max length (153>80): "0  gatt_db_service_get_handles (service=0x1751130, service=0x1751130, end_handle=0x7ffcd600806e, start_handle=0x7ffcd600806c) at src/shared/gatt-db.c:569"
9: B1 Line exceeds max length (170>80): "1  gatt_db_attribute_get_service_data (attrib=<optimized out>, start_handle=0x7ffcd600806c, end_handle=0x7ffcd600806e, primary=0x0, uuid=0x0) at src/shared/gatt-db.c:1657"
10: B1 Line exceeds max length (150>80): "2  0x00000000004983a8 in discovery_op_complete (op=op@entry=0x173b320, success=<optimized out>, err=err@entry=10 '\n') at src/shared/gatt-client.c:406"
11: B1 Line exceeds max length (172>80): "3  0x000000000049a548 in discover_descs_cb (success=<optimized out>, att_ecode=<optimized out>, result=<optimized out>, user_data=0x173b320) at src/shared/gatt-client.c:915"
12: B1 Line exceeds max length (142>80): "4  0x00000000004a1d87 in discovery_op_complete (op=0x1748450, success=<optimized out>, ecode=<optimized out>) at src/shared/gatt-helpers.c:615"
13: B1 Line exceeds max length (161>80): "5  0x00000000004a2379 in discover_descs_cb (opcode=<optimized out>, pdu=0x174d551, length=<optimized out>, user_data=0x1748450) at src/shared/gatt-helpers.c:1465"
14: B1 Line exceeds max length (132>80): "6  0x00000000004966db in handle_rsp (pdu_len=4, pdu=<optimized out>, opcode=<optimized out>, chan=0x17483c0) at src/shared/att.c:814"
15: B1 Line exceeds max length (83>80): "7  can_read_data (io=<optimized out>, user_data=0x17483c0) at src/shared/att.c:1011"
16: B1 Line exceeds max length (142>80): "8  0x00000000004a0853 in watch_callback (channel=<optimized out>, cond=<optimized out>, user_data=<optimized out>) at src/shared/io-glib.c:157"
17: B1 Line exceeds max length (84>80): "9  0x00007fb3f2d7fe87 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0"
21: B1 Line exceeds max length (161>80): "13 0x00000000004a11f2 in mainloop_run_with_signal (func=func@entry=0x43f200 <signal_callback>, user_data=user_data@entry=0x0) at src/shared/mainloop-notify.c:188"
22: B1 Line exceeds max length (92>80): "14 0x000000000040c72e in main (argc=<optimized out>, argv=<optimized out>) at src/main.c:959"


##############################
Test: CheckBuild - PASS

##############################
Test: MakeCheck - PASS



---
Regards,
Linux Bluetooth
Luiz Augusto von Dentz Nov. 5, 2020, 6:46 p.m. UTC | #2
Hi Sonny,

On Wed, Nov 4, 2020 at 10:01 PM Sonny Sasaka <sonnysasaka@chromium.org> wrote:
>

> If device is disconnected, discovery_complete_op should not continue

> processing since this can lead to a crash because the GATT-related

> objects may already be freed.

>

> Sample crash stack trace:

> 0  gatt_db_service_get_handles (service=0x1751130, service=0x1751130, end_handle=0x7ffcd600806e, start_handle=0x7ffcd600806c) at src/shared/gatt-db.c:569

> 1  gatt_db_attribute_get_service_data (attrib=<optimized out>, start_handle=0x7ffcd600806c, end_handle=0x7ffcd600806e, primary=0x0, uuid=0x0) at src/shared/gatt-db.c:1657

> 2  0x00000000004983a8 in discovery_op_complete (op=op@entry=0x173b320, success=<optimized out>, err=err@entry=10 '\n') at src/shared/gatt-client.c:406

> 3  0x000000000049a548 in discover_descs_cb (success=<optimized out>, att_ecode=<optimized out>, result=<optimized out>, user_data=0x173b320) at src/shared/gatt-client.c:915

> 4  0x00000000004a1d87 in discovery_op_complete (op=0x1748450, success=<optimized out>, ecode=<optimized out>) at src/shared/gatt-helpers.c:615

> 5  0x00000000004a2379 in discover_descs_cb (opcode=<optimized out>, pdu=0x174d551, length=<optimized out>, user_data=0x1748450) at src/shared/gatt-helpers.c:1465

> 6  0x00000000004966db in handle_rsp (pdu_len=4, pdu=<optimized out>, opcode=<optimized out>, chan=0x17483c0) at src/shared/att.c:814

> 7  can_read_data (io=<optimized out>, user_data=0x17483c0) at src/shared/att.c:1011


Weird, are you still receiving data after the device is disconnected?

> 8  0x00000000004a0853 in watch_callback (channel=<optimized out>, cond=<optimized out>, user_data=<optimized out>) at src/shared/io-glib.c:157

> 9  0x00007fb3f2d7fe87 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0

> 10 0x00007fb3f2d80230 in ?? () from /usr/lib64/libglib-2.0.so.0

> 11 0x00007fb3f2d80542 in g_main_loop_run () from /usr/lib64/libglib-2.0.so.0

> 12 0x00000000004a0e25 in mainloop_run () at src/shared/mainloop-glib.c:66

> 13 0x00000000004a11f2 in mainloop_run_with_signal (func=func@entry=0x43f200 <signal_callback>, user_data=user_data@entry=0x0) at src/shared/mainloop-notify.c:188

> 14 0x000000000040c72e in main (argc=<optimized out>, argv=<optimized out>) at src/main.c:959

>

> Reviewed-By: Archie Pusaka <apusaka@chromium.org>

>

> ---

>  src/device.c             |  3 +++

>  src/shared/gatt-client.c | 20 ++++++++++++++++++++

>  src/shared/gatt-client.h |  2 ++

>  3 files changed, 25 insertions(+)

>

> diff --git a/src/device.c b/src/device.c

> index a5ef46730..9dad746eb 100644

> --- a/src/device.c

> +++ b/src/device.c

> @@ -586,6 +586,9 @@ static void gatt_client_cleanup(struct btd_device *device)

>                 device->gatt_ready_id = 0;

>         }

>

> +       /* Make sure that service discovery is stopped if any is in progress */

> +       bt_gatt_client_reset_in_discovery(device->client);


Does this means that other references will also get interrupted? That
is something very weird here since when disconnected the bt_att
instance should actually have informed the bt_gatt_client and the
cleanup should had happened there.

> +

>         bt_gatt_client_unref(device->client);

>         device->client = NULL;

>  }

> diff --git a/src/shared/gatt-client.c b/src/shared/gatt-client.c

> index 8becf1c6c..2ba6efe9a 100644

> --- a/src/shared/gatt-client.c

> +++ b/src/shared/gatt-client.c

> @@ -104,6 +104,9 @@ struct bt_gatt_client {

>

>         struct bt_gatt_request *discovery_req;

>         unsigned int mtu_req_id;

> +

> +       /* Whether there is a service discovery operation ongoing */

> +       bool in_discovery;

>  };

>

>  struct request {

> @@ -381,6 +384,12 @@ static void discovery_op_complete(struct discovery_op *op, bool success,

>  {

>         const struct queue_entry *svc;

>

> +       /* Service discovery may be reset due to disconnection */

> +       if (!op->client->in_discovery)

> +               return;

> +

> +       op->client->in_discovery = false;

> +

>         op->success = success;

>

>         /* Read database hash if discovery has been successful */

> @@ -1857,6 +1866,9 @@ static void process_service_changed(struct bt_gatt_client *client,

>  {

>         struct discovery_op *op;

>

> +       if (client->in_discovery)

> +               goto fail;

> +

>         op = discovery_op_create(client, start_handle, end_handle,

>                                                 service_changed_complete,

>                                                 service_changed_failure);

> @@ -1869,6 +1881,7 @@ static void process_service_changed(struct bt_gatt_client *client,

>                                                 discovery_op_ref(op),

>                                                 discovery_op_unref);

>         if (client->discovery_req) {

> +               client->in_discovery = true;

>                 client->in_svc_chngd = true;

>                 return;

>         }

> @@ -2057,6 +2070,7 @@ static bool gatt_client_init(struct bt_gatt_client *client, uint16_t mtu)

>                 return false;

>         }

>

> +       client->in_discovery = true;

>         client->in_init = true;

>

>         return true;

> @@ -2080,6 +2094,7 @@ discover:

>         }

>

>  done:

> +       client->in_discovery = true;

>         client->in_init = true;

>         return true;

>  }

> @@ -3694,3 +3709,8 @@ int bt_gatt_client_get_security(struct bt_gatt_client *client)

>

>         return bt_att_get_security(client->att, NULL);

>  }

> +

> +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client)

> +{

> +       client->in_discovery = false;

> +}

> diff --git a/src/shared/gatt-client.h b/src/shared/gatt-client.h

> index dc5102394..dcd8e5cf6 100644

> --- a/src/shared/gatt-client.h

> +++ b/src/shared/gatt-client.h

> @@ -126,3 +126,5 @@ bool bt_gatt_client_unregister_notify(struct bt_gatt_client *client,

>

>  bool bt_gatt_client_set_security(struct bt_gatt_client *client, int level);

>  int bt_gatt_client_get_security(struct bt_gatt_client *client);

> +

> +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client);

> --

> 2.26.2

>



-- 
Luiz Augusto von Dentz
Sonny Sasaka Nov. 5, 2020, 7:20 p.m. UTC | #3
Hi Luiz,


On Thu, Nov 5, 2020 at 10:46 AM Luiz Augusto von Dentz
<luiz.dentz@gmail.com> wrote:
>

> Hi Sonny,

>

> On Wed, Nov 4, 2020 at 10:01 PM Sonny Sasaka <sonnysasaka@chromium.org> wrote:

> >

> > If device is disconnected, discovery_complete_op should not continue

> > processing since this can lead to a crash because the GATT-related

> > objects may already be freed.

> >

> > Sample crash stack trace:

> > 0  gatt_db_service_get_handles (service=0x1751130, service=0x1751130, end_handle=0x7ffcd600806e, start_handle=0x7ffcd600806c) at src/shared/gatt-db.c:569

> > 1  gatt_db_attribute_get_service_data (attrib=<optimized out>, start_handle=0x7ffcd600806c, end_handle=0x7ffcd600806e, primary=0x0, uuid=0x0) at src/shared/gatt-db.c:1657

> > 2  0x00000000004983a8 in discovery_op_complete (op=op@entry=0x173b320, success=<optimized out>, err=err@entry=10 '\n') at src/shared/gatt-client.c:406

> > 3  0x000000000049a548 in discover_descs_cb (success=<optimized out>, att_ecode=<optimized out>, result=<optimized out>, user_data=0x173b320) at src/shared/gatt-client.c:915

> > 4  0x00000000004a1d87 in discovery_op_complete (op=0x1748450, success=<optimized out>, ecode=<optimized out>) at src/shared/gatt-helpers.c:615

> > 5  0x00000000004a2379 in discover_descs_cb (opcode=<optimized out>, pdu=0x174d551, length=<optimized out>, user_data=0x1748450) at src/shared/gatt-helpers.c:1465

> > 6  0x00000000004966db in handle_rsp (pdu_len=4, pdu=<optimized out>, opcode=<optimized out>, chan=0x17483c0) at src/shared/att.c:814

> > 7  can_read_data (io=<optimized out>, user_data=0x17483c0) at src/shared/att.c:1011

>

> Weird, are you still receiving data after the device is disconnected?

To be more precise, this happens if I do "remove device" from
bluetoothctl while it's still doing GATT discovery, i.e. connect and
quickly remove device. Also this is reproducible if the GATT.Cache
config is set to "yes". So, the "disconnection" may not be physical
but a consequence of the code path of the RemoveDevice API. I can
reproduce it almost 100% of the time with an LE Mouse (tried with MS
Mouse 3600 and Logitech M355).
>

> > 8  0x00000000004a0853 in watch_callback (channel=<optimized out>, cond=<optimized out>, user_data=<optimized out>) at src/shared/io-glib.c:157

> > 9  0x00007fb3f2d7fe87 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0

> > 10 0x00007fb3f2d80230 in ?? () from /usr/lib64/libglib-2.0.so.0

> > 11 0x00007fb3f2d80542 in g_main_loop_run () from /usr/lib64/libglib-2.0.so.0

> > 12 0x00000000004a0e25 in mainloop_run () at src/shared/mainloop-glib.c:66

> > 13 0x00000000004a11f2 in mainloop_run_with_signal (func=func@entry=0x43f200 <signal_callback>, user_data=user_data@entry=0x0) at src/shared/mainloop-notify.c:188

> > 14 0x000000000040c72e in main (argc=<optimized out>, argv=<optimized out>) at src/main.c:959

> >

> > Reviewed-By: Archie Pusaka <apusaka@chromium.org>

> >

> > ---

> >  src/device.c             |  3 +++

> >  src/shared/gatt-client.c | 20 ++++++++++++++++++++

> >  src/shared/gatt-client.h |  2 ++

> >  3 files changed, 25 insertions(+)

> >

> > diff --git a/src/device.c b/src/device.c

> > index a5ef46730..9dad746eb 100644

> > --- a/src/device.c

> > +++ b/src/device.c

> > @@ -586,6 +586,9 @@ static void gatt_client_cleanup(struct btd_device *device)

> >                 device->gatt_ready_id = 0;

> >         }

> >

> > +       /* Make sure that service discovery is stopped if any is in progress */

> > +       bt_gatt_client_reset_in_discovery(device->client);

>

> Does this means that other references will also get interrupted? That

> is something very weird here since when disconnected the bt_att

> instance should actually have informed the bt_gatt_client and the

> cleanup should had happened there.

What do you mean by "other references"? My guess is that
bt_gatt_client has done the cleanup, but discovery_op_complete still
tries to execute the whole code path which involves accessing the
already cleaned-up gatt_db_attribute objects.
>

> > +

> >         bt_gatt_client_unref(device->client);

> >         device->client = NULL;

> >  }

> > diff --git a/src/shared/gatt-client.c b/src/shared/gatt-client.c

> > index 8becf1c6c..2ba6efe9a 100644

> > --- a/src/shared/gatt-client.c

> > +++ b/src/shared/gatt-client.c

> > @@ -104,6 +104,9 @@ struct bt_gatt_client {

> >

> >         struct bt_gatt_request *discovery_req;

> >         unsigned int mtu_req_id;

> > +

> > +       /* Whether there is a service discovery operation ongoing */

> > +       bool in_discovery;

> >  };

> >

> >  struct request {

> > @@ -381,6 +384,12 @@ static void discovery_op_complete(struct discovery_op *op, bool success,

> >  {

> >         const struct queue_entry *svc;

> >

> > +       /* Service discovery may be reset due to disconnection */

> > +       if (!op->client->in_discovery)

> > +               return;

> > +

> > +       op->client->in_discovery = false;

> > +

> >         op->success = success;

> >

> >         /* Read database hash if discovery has been successful */

> > @@ -1857,6 +1866,9 @@ static void process_service_changed(struct bt_gatt_client *client,

> >  {

> >         struct discovery_op *op;

> >

> > +       if (client->in_discovery)

> > +               goto fail;

> > +

> >         op = discovery_op_create(client, start_handle, end_handle,

> >                                                 service_changed_complete,

> >                                                 service_changed_failure);

> > @@ -1869,6 +1881,7 @@ static void process_service_changed(struct bt_gatt_client *client,

> >                                                 discovery_op_ref(op),

> >                                                 discovery_op_unref);

> >         if (client->discovery_req) {

> > +               client->in_discovery = true;

> >                 client->in_svc_chngd = true;

> >                 return;

> >         }

> > @@ -2057,6 +2070,7 @@ static bool gatt_client_init(struct bt_gatt_client *client, uint16_t mtu)

> >                 return false;

> >         }

> >

> > +       client->in_discovery = true;

> >         client->in_init = true;

> >

> >         return true;

> > @@ -2080,6 +2094,7 @@ discover:

> >         }

> >

> >  done:

> > +       client->in_discovery = true;

> >         client->in_init = true;

> >         return true;

> >  }

> > @@ -3694,3 +3709,8 @@ int bt_gatt_client_get_security(struct bt_gatt_client *client)

> >

> >         return bt_att_get_security(client->att, NULL);

> >  }

> > +

> > +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client)

> > +{

> > +       client->in_discovery = false;

> > +}

> > diff --git a/src/shared/gatt-client.h b/src/shared/gatt-client.h

> > index dc5102394..dcd8e5cf6 100644

> > --- a/src/shared/gatt-client.h

> > +++ b/src/shared/gatt-client.h

> > @@ -126,3 +126,5 @@ bool bt_gatt_client_unregister_notify(struct bt_gatt_client *client,

> >

> >  bool bt_gatt_client_set_security(struct bt_gatt_client *client, int level);

> >  int bt_gatt_client_get_security(struct bt_gatt_client *client);

> > +

> > +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client);

> > --

> > 2.26.2

> >

>

>

> --

> Luiz Augusto von Dentz
Luiz Augusto von Dentz Nov. 5, 2020, 9:03 p.m. UTC | #4
Hi Sonny,

On Thu, Nov 5, 2020 at 11:20 AM Sonny Sasaka <sonnysasaka@chromium.org> wrote:
>

> Hi Luiz,

>

>

> On Thu, Nov 5, 2020 at 10:46 AM Luiz Augusto von Dentz

> <luiz.dentz@gmail.com> wrote:

> >

> > Hi Sonny,

> >

> > On Wed, Nov 4, 2020 at 10:01 PM Sonny Sasaka <sonnysasaka@chromium.org> wrote:

> > >

> > > If device is disconnected, discovery_complete_op should not continue

> > > processing since this can lead to a crash because the GATT-related

> > > objects may already be freed.

> > >

> > > Sample crash stack trace:

> > > 0  gatt_db_service_get_handles (service=0x1751130, service=0x1751130, end_handle=0x7ffcd600806e, start_handle=0x7ffcd600806c) at src/shared/gatt-db.c:569

> > > 1  gatt_db_attribute_get_service_data (attrib=<optimized out>, start_handle=0x7ffcd600806c, end_handle=0x7ffcd600806e, primary=0x0, uuid=0x0) at src/shared/gatt-db.c:1657

> > > 2  0x00000000004983a8 in discovery_op_complete (op=op@entry=0x173b320, success=<optimized out>, err=err@entry=10 '\n') at src/shared/gatt-client.c:406

> > > 3  0x000000000049a548 in discover_descs_cb (success=<optimized out>, att_ecode=<optimized out>, result=<optimized out>, user_data=0x173b320) at src/shared/gatt-client.c:915

> > > 4  0x00000000004a1d87 in discovery_op_complete (op=0x1748450, success=<optimized out>, ecode=<optimized out>) at src/shared/gatt-helpers.c:615

> > > 5  0x00000000004a2379 in discover_descs_cb (opcode=<optimized out>, pdu=0x174d551, length=<optimized out>, user_data=0x1748450) at src/shared/gatt-helpers.c:1465

> > > 6  0x00000000004966db in handle_rsp (pdu_len=4, pdu=<optimized out>, opcode=<optimized out>, chan=0x17483c0) at src/shared/att.c:814

> > > 7  can_read_data (io=<optimized out>, user_data=0x17483c0) at src/shared/att.c:1011

> >

> > Weird, are you still receiving data after the device is disconnected?

> To be more precise, this happens if I do "remove device" from

> bluetoothctl while it's still doing GATT discovery, i.e. connect and

> quickly remove device. Also this is reproducible if the GATT.Cache

> config is set to "yes". So, the "disconnection" may not be physical

> but a consequence of the code path of the RemoveDevice API. I can

> reproduce it almost 100% of the time with an LE Mouse (tried with MS

> Mouse 3600 and Logitech M355).

> >

> > > 8  0x00000000004a0853 in watch_callback (channel=<optimized out>, cond=<optimized out>, user_data=<optimized out>) at src/shared/io-glib.c:157

> > > 9  0x00007fb3f2d7fe87 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0

> > > 10 0x00007fb3f2d80230 in ?? () from /usr/lib64/libglib-2.0.so.0

> > > 11 0x00007fb3f2d80542 in g_main_loop_run () from /usr/lib64/libglib-2.0.so.0

> > > 12 0x00000000004a0e25 in mainloop_run () at src/shared/mainloop-glib.c:66

> > > 13 0x00000000004a11f2 in mainloop_run_with_signal (func=func@entry=0x43f200 <signal_callback>, user_data=user_data@entry=0x0) at src/shared/mainloop-notify.c:188

> > > 14 0x000000000040c72e in main (argc=<optimized out>, argv=<optimized out>) at src/main.c:959

> > >

> > > Reviewed-By: Archie Pusaka <apusaka@chromium.org>

> > >

> > > ---

> > >  src/device.c             |  3 +++

> > >  src/shared/gatt-client.c | 20 ++++++++++++++++++++

> > >  src/shared/gatt-client.h |  2 ++

> > >  3 files changed, 25 insertions(+)

> > >

> > > diff --git a/src/device.c b/src/device.c

> > > index a5ef46730..9dad746eb 100644

> > > --- a/src/device.c

> > > +++ b/src/device.c

> > > @@ -586,6 +586,9 @@ static void gatt_client_cleanup(struct btd_device *device)

> > >                 device->gatt_ready_id = 0;

> > >         }

> > >

> > > +       /* Make sure that service discovery is stopped if any is in progress */

> > > +       bt_gatt_client_reset_in_discovery(device->client);

> >

> > Does this means that other references will also get interrupted? That

> > is something very weird here since when disconnected the bt_att

> > instance should actually have informed the bt_gatt_client and the

> > cleanup should had happened there.

> What do you mean by "other references"? My guess is that

> bt_gatt_client has done the cleanup, but discovery_op_complete still

> tries to execute the whole code path which involves accessing the

> already cleaned-up gatt_db_attribute objects.


Here normally what should have happened is:

bt_gatt_client_unref -> bt_gatt_client_free ->
bt_gatt_client_cancel_all -> bt_gatt_request_unref ->
discovery_op_unref -> discovery_op_free

So there is something about this code path that is not really working
as intended or perhaps bt_gatt_client_free is never really called? It
would perhaps be worth trying to reproduce this with a a unit test.

> >

> > > +

> > >         bt_gatt_client_unref(device->client);

> > >         device->client = NULL;

> > >  }

> > > diff --git a/src/shared/gatt-client.c b/src/shared/gatt-client.c

> > > index 8becf1c6c..2ba6efe9a 100644

> > > --- a/src/shared/gatt-client.c

> > > +++ b/src/shared/gatt-client.c

> > > @@ -104,6 +104,9 @@ struct bt_gatt_client {

> > >

> > >         struct bt_gatt_request *discovery_req;

> > >         unsigned int mtu_req_id;

> > > +

> > > +       /* Whether there is a service discovery operation ongoing */

> > > +       bool in_discovery;

> > >  };

> > >

> > >  struct request {

> > > @@ -381,6 +384,12 @@ static void discovery_op_complete(struct discovery_op *op, bool success,

> > >  {

> > >         const struct queue_entry *svc;

> > >

> > > +       /* Service discovery may be reset due to disconnection */

> > > +       if (!op->client->in_discovery)

> > > +               return;

> > > +

> > > +       op->client->in_discovery = false;

> > > +

> > >         op->success = success;

> > >

> > >         /* Read database hash if discovery has been successful */

> > > @@ -1857,6 +1866,9 @@ static void process_service_changed(struct bt_gatt_client *client,

> > >  {

> > >         struct discovery_op *op;

> > >

> > > +       if (client->in_discovery)

> > > +               goto fail;

> > > +

> > >         op = discovery_op_create(client, start_handle, end_handle,

> > >                                                 service_changed_complete,

> > >                                                 service_changed_failure);

> > > @@ -1869,6 +1881,7 @@ static void process_service_changed(struct bt_gatt_client *client,

> > >                                                 discovery_op_ref(op),

> > >                                                 discovery_op_unref);

> > >         if (client->discovery_req) {

> > > +               client->in_discovery = true;

> > >                 client->in_svc_chngd = true;

> > >                 return;

> > >         }

> > > @@ -2057,6 +2070,7 @@ static bool gatt_client_init(struct bt_gatt_client *client, uint16_t mtu)

> > >                 return false;

> > >         }

> > >

> > > +       client->in_discovery = true;

> > >         client->in_init = true;

> > >

> > >         return true;

> > > @@ -2080,6 +2094,7 @@ discover:

> > >         }

> > >

> > >  done:

> > > +       client->in_discovery = true;

> > >         client->in_init = true;

> > >         return true;

> > >  }

> > > @@ -3694,3 +3709,8 @@ int bt_gatt_client_get_security(struct bt_gatt_client *client)

> > >

> > >         return bt_att_get_security(client->att, NULL);

> > >  }

> > > +

> > > +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client)

> > > +{

> > > +       client->in_discovery = false;

> > > +}

> > > diff --git a/src/shared/gatt-client.h b/src/shared/gatt-client.h

> > > index dc5102394..dcd8e5cf6 100644

> > > --- a/src/shared/gatt-client.h

> > > +++ b/src/shared/gatt-client.h

> > > @@ -126,3 +126,5 @@ bool bt_gatt_client_unregister_notify(struct bt_gatt_client *client,

> > >

> > >  bool bt_gatt_client_set_security(struct bt_gatt_client *client, int level);

> > >  int bt_gatt_client_get_security(struct bt_gatt_client *client);

> > > +

> > > +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client);

> > > --

> > > 2.26.2

> > >

> >

> >

> > --

> > Luiz Augusto von Dentz




-- 
Luiz Augusto von Dentz
Sonny Sasaka Nov. 5, 2020, 9:10 p.m. UTC | #5
Hi Luiz,

bt_gatt_client is not freed. It is expected that bt_gatt_client is
still alive at discovery_op_complete since the code clearly references
it. It is the gatt_db_attribute objects in client->pending_svcs that
are already freed.

On Thu, Nov 5, 2020 at 1:03 PM Luiz Augusto von Dentz
<luiz.dentz@gmail.com> wrote:
>

> Hi Sonny,

>

> On Thu, Nov 5, 2020 at 11:20 AM Sonny Sasaka <sonnysasaka@chromium.org> wrote:

> >

> > Hi Luiz,

> >

> >

> > On Thu, Nov 5, 2020 at 10:46 AM Luiz Augusto von Dentz

> > <luiz.dentz@gmail.com> wrote:

> > >

> > > Hi Sonny,

> > >

> > > On Wed, Nov 4, 2020 at 10:01 PM Sonny Sasaka <sonnysasaka@chromium.org> wrote:

> > > >

> > > > If device is disconnected, discovery_complete_op should not continue

> > > > processing since this can lead to a crash because the GATT-related

> > > > objects may already be freed.

> > > >

> > > > Sample crash stack trace:

> > > > 0  gatt_db_service_get_handles (service=0x1751130, service=0x1751130, end_handle=0x7ffcd600806e, start_handle=0x7ffcd600806c) at src/shared/gatt-db.c:569

> > > > 1  gatt_db_attribute_get_service_data (attrib=<optimized out>, start_handle=0x7ffcd600806c, end_handle=0x7ffcd600806e, primary=0x0, uuid=0x0) at src/shared/gatt-db.c:1657

> > > > 2  0x00000000004983a8 in discovery_op_complete (op=op@entry=0x173b320, success=<optimized out>, err=err@entry=10 '\n') at src/shared/gatt-client.c:406

> > > > 3  0x000000000049a548 in discover_descs_cb (success=<optimized out>, att_ecode=<optimized out>, result=<optimized out>, user_data=0x173b320) at src/shared/gatt-client.c:915

> > > > 4  0x00000000004a1d87 in discovery_op_complete (op=0x1748450, success=<optimized out>, ecode=<optimized out>) at src/shared/gatt-helpers.c:615

> > > > 5  0x00000000004a2379 in discover_descs_cb (opcode=<optimized out>, pdu=0x174d551, length=<optimized out>, user_data=0x1748450) at src/shared/gatt-helpers.c:1465

> > > > 6  0x00000000004966db in handle_rsp (pdu_len=4, pdu=<optimized out>, opcode=<optimized out>, chan=0x17483c0) at src/shared/att.c:814

> > > > 7  can_read_data (io=<optimized out>, user_data=0x17483c0) at src/shared/att.c:1011

> > >

> > > Weird, are you still receiving data after the device is disconnected?

> > To be more precise, this happens if I do "remove device" from

> > bluetoothctl while it's still doing GATT discovery, i.e. connect and

> > quickly remove device. Also this is reproducible if the GATT.Cache

> > config is set to "yes". So, the "disconnection" may not be physical

> > but a consequence of the code path of the RemoveDevice API. I can

> > reproduce it almost 100% of the time with an LE Mouse (tried with MS

> > Mouse 3600 and Logitech M355).

> > >

> > > > 8  0x00000000004a0853 in watch_callback (channel=<optimized out>, cond=<optimized out>, user_data=<optimized out>) at src/shared/io-glib.c:157

> > > > 9  0x00007fb3f2d7fe87 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0

> > > > 10 0x00007fb3f2d80230 in ?? () from /usr/lib64/libglib-2.0.so.0

> > > > 11 0x00007fb3f2d80542 in g_main_loop_run () from /usr/lib64/libglib-2.0.so.0

> > > > 12 0x00000000004a0e25 in mainloop_run () at src/shared/mainloop-glib.c:66

> > > > 13 0x00000000004a11f2 in mainloop_run_with_signal (func=func@entry=0x43f200 <signal_callback>, user_data=user_data@entry=0x0) at src/shared/mainloop-notify.c:188

> > > > 14 0x000000000040c72e in main (argc=<optimized out>, argv=<optimized out>) at src/main.c:959

> > > >

> > > > Reviewed-By: Archie Pusaka <apusaka@chromium.org>

> > > >

> > > > ---

> > > >  src/device.c             |  3 +++

> > > >  src/shared/gatt-client.c | 20 ++++++++++++++++++++

> > > >  src/shared/gatt-client.h |  2 ++

> > > >  3 files changed, 25 insertions(+)

> > > >

> > > > diff --git a/src/device.c b/src/device.c

> > > > index a5ef46730..9dad746eb 100644

> > > > --- a/src/device.c

> > > > +++ b/src/device.c

> > > > @@ -586,6 +586,9 @@ static void gatt_client_cleanup(struct btd_device *device)

> > > >                 device->gatt_ready_id = 0;

> > > >         }

> > > >

> > > > +       /* Make sure that service discovery is stopped if any is in progress */

> > > > +       bt_gatt_client_reset_in_discovery(device->client);

> > >

> > > Does this means that other references will also get interrupted? That

> > > is something very weird here since when disconnected the bt_att

> > > instance should actually have informed the bt_gatt_client and the

> > > cleanup should had happened there.

> > What do you mean by "other references"? My guess is that

> > bt_gatt_client has done the cleanup, but discovery_op_complete still

> > tries to execute the whole code path which involves accessing the

> > already cleaned-up gatt_db_attribute objects.

>

> Here normally what should have happened is:

>

> bt_gatt_client_unref -> bt_gatt_client_free ->

> bt_gatt_client_cancel_all -> bt_gatt_request_unref ->

> discovery_op_unref -> discovery_op_free

>

> So there is something about this code path that is not really working

> as intended or perhaps bt_gatt_client_free is never really called? It

> would perhaps be worth trying to reproduce this with a a unit test.

>

> > >

> > > > +

> > > >         bt_gatt_client_unref(device->client);

> > > >         device->client = NULL;

> > > >  }

> > > > diff --git a/src/shared/gatt-client.c b/src/shared/gatt-client.c

> > > > index 8becf1c6c..2ba6efe9a 100644

> > > > --- a/src/shared/gatt-client.c

> > > > +++ b/src/shared/gatt-client.c

> > > > @@ -104,6 +104,9 @@ struct bt_gatt_client {

> > > >

> > > >         struct bt_gatt_request *discovery_req;

> > > >         unsigned int mtu_req_id;

> > > > +

> > > > +       /* Whether there is a service discovery operation ongoing */

> > > > +       bool in_discovery;

> > > >  };

> > > >

> > > >  struct request {

> > > > @@ -381,6 +384,12 @@ static void discovery_op_complete(struct discovery_op *op, bool success,

> > > >  {

> > > >         const struct queue_entry *svc;

> > > >

> > > > +       /* Service discovery may be reset due to disconnection */

> > > > +       if (!op->client->in_discovery)

> > > > +               return;

> > > > +

> > > > +       op->client->in_discovery = false;

> > > > +

> > > >         op->success = success;

> > > >

> > > >         /* Read database hash if discovery has been successful */

> > > > @@ -1857,6 +1866,9 @@ static void process_service_changed(struct bt_gatt_client *client,

> > > >  {

> > > >         struct discovery_op *op;

> > > >

> > > > +       if (client->in_discovery)

> > > > +               goto fail;

> > > > +

> > > >         op = discovery_op_create(client, start_handle, end_handle,

> > > >                                                 service_changed_complete,

> > > >                                                 service_changed_failure);

> > > > @@ -1869,6 +1881,7 @@ static void process_service_changed(struct bt_gatt_client *client,

> > > >                                                 discovery_op_ref(op),

> > > >                                                 discovery_op_unref);

> > > >         if (client->discovery_req) {

> > > > +               client->in_discovery = true;

> > > >                 client->in_svc_chngd = true;

> > > >                 return;

> > > >         }

> > > > @@ -2057,6 +2070,7 @@ static bool gatt_client_init(struct bt_gatt_client *client, uint16_t mtu)

> > > >                 return false;

> > > >         }

> > > >

> > > > +       client->in_discovery = true;

> > > >         client->in_init = true;

> > > >

> > > >         return true;

> > > > @@ -2080,6 +2094,7 @@ discover:

> > > >         }

> > > >

> > > >  done:

> > > > +       client->in_discovery = true;

> > > >         client->in_init = true;

> > > >         return true;

> > > >  }

> > > > @@ -3694,3 +3709,8 @@ int bt_gatt_client_get_security(struct bt_gatt_client *client)

> > > >

> > > >         return bt_att_get_security(client->att, NULL);

> > > >  }

> > > > +

> > > > +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client)

> > > > +{

> > > > +       client->in_discovery = false;

> > > > +}

> > > > diff --git a/src/shared/gatt-client.h b/src/shared/gatt-client.h

> > > > index dc5102394..dcd8e5cf6 100644

> > > > --- a/src/shared/gatt-client.h

> > > > +++ b/src/shared/gatt-client.h

> > > > @@ -126,3 +126,5 @@ bool bt_gatt_client_unregister_notify(struct bt_gatt_client *client,

> > > >

> > > >  bool bt_gatt_client_set_security(struct bt_gatt_client *client, int level);

> > > >  int bt_gatt_client_get_security(struct bt_gatt_client *client);

> > > > +

> > > > +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client);

> > > > --

> > > > 2.26.2

> > > >

> > >

> > >

> > > --

> > > Luiz Augusto von Dentz

>

>

>

> --

> Luiz Augusto von Dentz
Luiz Augusto von Dentz Nov. 5, 2020, 9:27 p.m. UTC | #6
Hi Sonny,

On Thu, Nov 5, 2020 at 1:10 PM Sonny Sasaka <sonnysasaka@chromium.org> wrote:
>

> Hi Luiz,

>

> bt_gatt_client is not freed. It is expected that bt_gatt_client is

> still alive at discovery_op_complete since the code clearly references

> it. It is the gatt_db_attribute objects in client->pending_svcs that

> are already freed.


That probably means there are other references, also if the attributes
are being freed that probably means gatt_db_clear has been called
which would explain why I could not reproduce, that said we should
invoke bt_gatt_client_cancel_all before cleanup the db.

> On Thu, Nov 5, 2020 at 1:03 PM Luiz Augusto von Dentz

> <luiz.dentz@gmail.com> wrote:

> >

> > Hi Sonny,

> >

> > On Thu, Nov 5, 2020 at 11:20 AM Sonny Sasaka <sonnysasaka@chromium.org> wrote:

> > >

> > > Hi Luiz,

> > >

> > >

> > > On Thu, Nov 5, 2020 at 10:46 AM Luiz Augusto von Dentz

> > > <luiz.dentz@gmail.com> wrote:

> > > >

> > > > Hi Sonny,

> > > >

> > > > On Wed, Nov 4, 2020 at 10:01 PM Sonny Sasaka <sonnysasaka@chromium.org> wrote:

> > > > >

> > > > > If device is disconnected, discovery_complete_op should not continue

> > > > > processing since this can lead to a crash because the GATT-related

> > > > > objects may already be freed.

> > > > >

> > > > > Sample crash stack trace:

> > > > > 0  gatt_db_service_get_handles (service=0x1751130, service=0x1751130, end_handle=0x7ffcd600806e, start_handle=0x7ffcd600806c) at src/shared/gatt-db.c:569

> > > > > 1  gatt_db_attribute_get_service_data (attrib=<optimized out>, start_handle=0x7ffcd600806c, end_handle=0x7ffcd600806e, primary=0x0, uuid=0x0) at src/shared/gatt-db.c:1657

> > > > > 2  0x00000000004983a8 in discovery_op_complete (op=op@entry=0x173b320, success=<optimized out>, err=err@entry=10 '\n') at src/shared/gatt-client.c:406

> > > > > 3  0x000000000049a548 in discover_descs_cb (success=<optimized out>, att_ecode=<optimized out>, result=<optimized out>, user_data=0x173b320) at src/shared/gatt-client.c:915

> > > > > 4  0x00000000004a1d87 in discovery_op_complete (op=0x1748450, success=<optimized out>, ecode=<optimized out>) at src/shared/gatt-helpers.c:615

> > > > > 5  0x00000000004a2379 in discover_descs_cb (opcode=<optimized out>, pdu=0x174d551, length=<optimized out>, user_data=0x1748450) at src/shared/gatt-helpers.c:1465

> > > > > 6  0x00000000004966db in handle_rsp (pdu_len=4, pdu=<optimized out>, opcode=<optimized out>, chan=0x17483c0) at src/shared/att.c:814

> > > > > 7  can_read_data (io=<optimized out>, user_data=0x17483c0) at src/shared/att.c:1011

> > > >

> > > > Weird, are you still receiving data after the device is disconnected?

> > > To be more precise, this happens if I do "remove device" from

> > > bluetoothctl while it's still doing GATT discovery, i.e. connect and

> > > quickly remove device. Also this is reproducible if the GATT.Cache

> > > config is set to "yes". So, the "disconnection" may not be physical

> > > but a consequence of the code path of the RemoveDevice API. I can

> > > reproduce it almost 100% of the time with an LE Mouse (tried with MS

> > > Mouse 3600 and Logitech M355).

> > > >

> > > > > 8  0x00000000004a0853 in watch_callback (channel=<optimized out>, cond=<optimized out>, user_data=<optimized out>) at src/shared/io-glib.c:157

> > > > > 9  0x00007fb3f2d7fe87 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0

> > > > > 10 0x00007fb3f2d80230 in ?? () from /usr/lib64/libglib-2.0.so.0

> > > > > 11 0x00007fb3f2d80542 in g_main_loop_run () from /usr/lib64/libglib-2.0.so.0

> > > > > 12 0x00000000004a0e25 in mainloop_run () at src/shared/mainloop-glib.c:66

> > > > > 13 0x00000000004a11f2 in mainloop_run_with_signal (func=func@entry=0x43f200 <signal_callback>, user_data=user_data@entry=0x0) at src/shared/mainloop-notify.c:188

> > > > > 14 0x000000000040c72e in main (argc=<optimized out>, argv=<optimized out>) at src/main.c:959

> > > > >

> > > > > Reviewed-By: Archie Pusaka <apusaka@chromium.org>

> > > > >

> > > > > ---

> > > > >  src/device.c             |  3 +++

> > > > >  src/shared/gatt-client.c | 20 ++++++++++++++++++++

> > > > >  src/shared/gatt-client.h |  2 ++

> > > > >  3 files changed, 25 insertions(+)

> > > > >

> > > > > diff --git a/src/device.c b/src/device.c

> > > > > index a5ef46730..9dad746eb 100644

> > > > > --- a/src/device.c

> > > > > +++ b/src/device.c

> > > > > @@ -586,6 +586,9 @@ static void gatt_client_cleanup(struct btd_device *device)

> > > > >                 device->gatt_ready_id = 0;

> > > > >         }

> > > > >

> > > > > +       /* Make sure that service discovery is stopped if any is in progress */

> > > > > +       bt_gatt_client_reset_in_discovery(device->client);

> > > >

> > > > Does this means that other references will also get interrupted? That

> > > > is something very weird here since when disconnected the bt_att

> > > > instance should actually have informed the bt_gatt_client and the

> > > > cleanup should had happened there.

> > > What do you mean by "other references"? My guess is that

> > > bt_gatt_client has done the cleanup, but discovery_op_complete still

> > > tries to execute the whole code path which involves accessing the

> > > already cleaned-up gatt_db_attribute objects.

> >

> > Here normally what should have happened is:

> >

> > bt_gatt_client_unref -> bt_gatt_client_free ->

> > bt_gatt_client_cancel_all -> bt_gatt_request_unref ->

> > discovery_op_unref -> discovery_op_free

> >

> > So there is something about this code path that is not really working

> > as intended or perhaps bt_gatt_client_free is never really called? It

> > would perhaps be worth trying to reproduce this with a a unit test.

> >

> > > >

> > > > > +

> > > > >         bt_gatt_client_unref(device->client);

> > > > >         device->client = NULL;

> > > > >  }

> > > > > diff --git a/src/shared/gatt-client.c b/src/shared/gatt-client.c

> > > > > index 8becf1c6c..2ba6efe9a 100644

> > > > > --- a/src/shared/gatt-client.c

> > > > > +++ b/src/shared/gatt-client.c

> > > > > @@ -104,6 +104,9 @@ struct bt_gatt_client {

> > > > >

> > > > >         struct bt_gatt_request *discovery_req;

> > > > >         unsigned int mtu_req_id;

> > > > > +

> > > > > +       /* Whether there is a service discovery operation ongoing */

> > > > > +       bool in_discovery;

> > > > >  };

> > > > >

> > > > >  struct request {

> > > > > @@ -381,6 +384,12 @@ static void discovery_op_complete(struct discovery_op *op, bool success,

> > > > >  {

> > > > >         const struct queue_entry *svc;

> > > > >

> > > > > +       /* Service discovery may be reset due to disconnection */

> > > > > +       if (!op->client->in_discovery)

> > > > > +               return;

> > > > > +

> > > > > +       op->client->in_discovery = false;

> > > > > +

> > > > >         op->success = success;

> > > > >

> > > > >         /* Read database hash if discovery has been successful */

> > > > > @@ -1857,6 +1866,9 @@ static void process_service_changed(struct bt_gatt_client *client,

> > > > >  {

> > > > >         struct discovery_op *op;

> > > > >

> > > > > +       if (client->in_discovery)

> > > > > +               goto fail;

> > > > > +

> > > > >         op = discovery_op_create(client, start_handle, end_handle,

> > > > >                                                 service_changed_complete,

> > > > >                                                 service_changed_failure);

> > > > > @@ -1869,6 +1881,7 @@ static void process_service_changed(struct bt_gatt_client *client,

> > > > >                                                 discovery_op_ref(op),

> > > > >                                                 discovery_op_unref);

> > > > >         if (client->discovery_req) {

> > > > > +               client->in_discovery = true;

> > > > >                 client->in_svc_chngd = true;

> > > > >                 return;

> > > > >         }

> > > > > @@ -2057,6 +2070,7 @@ static bool gatt_client_init(struct bt_gatt_client *client, uint16_t mtu)

> > > > >                 return false;

> > > > >         }

> > > > >

> > > > > +       client->in_discovery = true;

> > > > >         client->in_init = true;

> > > > >

> > > > >         return true;

> > > > > @@ -2080,6 +2094,7 @@ discover:

> > > > >         }

> > > > >

> > > > >  done:

> > > > > +       client->in_discovery = true;

> > > > >         client->in_init = true;

> > > > >         return true;

> > > > >  }

> > > > > @@ -3694,3 +3709,8 @@ int bt_gatt_client_get_security(struct bt_gatt_client *client)

> > > > >

> > > > >         return bt_att_get_security(client->att, NULL);

> > > > >  }

> > > > > +

> > > > > +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client)

> > > > > +{

> > > > > +       client->in_discovery = false;

> > > > > +}

> > > > > diff --git a/src/shared/gatt-client.h b/src/shared/gatt-client.h

> > > > > index dc5102394..dcd8e5cf6 100644

> > > > > --- a/src/shared/gatt-client.h

> > > > > +++ b/src/shared/gatt-client.h

> > > > > @@ -126,3 +126,5 @@ bool bt_gatt_client_unregister_notify(struct bt_gatt_client *client,

> > > > >

> > > > >  bool bt_gatt_client_set_security(struct bt_gatt_client *client, int level);

> > > > >  int bt_gatt_client_get_security(struct bt_gatt_client *client);

> > > > > +

> > > > > +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client);

> > > > > --

> > > > > 2.26.2

> > > > >

> > > >

> > > >

> > > > --

> > > > Luiz Augusto von Dentz

> >

> >

> >

> > --

> > Luiz Augusto von Dentz




-- 
Luiz Augusto von Dentz
Sonny Sasaka Nov. 5, 2020, 10:54 p.m. UTC | #7
Hi Luiz,

On Thu, Nov 5, 2020 at 1:28 PM Luiz Augusto von Dentz
<luiz.dentz@gmail.com> wrote:
>

> Hi Sonny,

>

> On Thu, Nov 5, 2020 at 1:10 PM Sonny Sasaka <sonnysasaka@chromium.org> wrote:

> >

> > Hi Luiz,

> >

> > bt_gatt_client is not freed. It is expected that bt_gatt_client is

> > still alive at discovery_op_complete since the code clearly references

> > it. It is the gatt_db_attribute objects in client->pending_svcs that

> > are already freed.

>

> That probably means there are other references, also if the attributes

> are being freed that probably means gatt_db_clear has been called

> which would explain why I could not reproduce, that said we should

> invoke bt_gatt_client_cancel_all before cleanup the db.

Yes there are other references. So we can't rely on gatt_client_free
to do the cancel_all. Instead, how about we do the
bt_gatt_client_cancel_all before clearing the db when cleaning up the
cache? I have tested and it fixes the crash for me. (if you're curious
about the repro, maybe you need to set GATT.Cache = "yes" in the
config). Please take a look at my other patch called "Cancel GATT
client operations when cleaning up GATT cache".

>

> > On Thu, Nov 5, 2020 at 1:03 PM Luiz Augusto von Dentz

> > <luiz.dentz@gmail.com> wrote:

> > >

> > > Hi Sonny,

> > >

> > > On Thu, Nov 5, 2020 at 11:20 AM Sonny Sasaka <sonnysasaka@chromium.org> wrote:

> > > >

> > > > Hi Luiz,

> > > >

> > > >

> > > > On Thu, Nov 5, 2020 at 10:46 AM Luiz Augusto von Dentz

> > > > <luiz.dentz@gmail.com> wrote:

> > > > >

> > > > > Hi Sonny,

> > > > >

> > > > > On Wed, Nov 4, 2020 at 10:01 PM Sonny Sasaka <sonnysasaka@chromium.org> wrote:

> > > > > >

> > > > > > If device is disconnected, discovery_complete_op should not continue

> > > > > > processing since this can lead to a crash because the GATT-related

> > > > > > objects may already be freed.

> > > > > >

> > > > > > Sample crash stack trace:

> > > > > > 0  gatt_db_service_get_handles (service=0x1751130, service=0x1751130, end_handle=0x7ffcd600806e, start_handle=0x7ffcd600806c) at src/shared/gatt-db.c:569

> > > > > > 1  gatt_db_attribute_get_service_data (attrib=<optimized out>, start_handle=0x7ffcd600806c, end_handle=0x7ffcd600806e, primary=0x0, uuid=0x0) at src/shared/gatt-db.c:1657

> > > > > > 2  0x00000000004983a8 in discovery_op_complete (op=op@entry=0x173b320, success=<optimized out>, err=err@entry=10 '\n') at src/shared/gatt-client.c:406

> > > > > > 3  0x000000000049a548 in discover_descs_cb (success=<optimized out>, att_ecode=<optimized out>, result=<optimized out>, user_data=0x173b320) at src/shared/gatt-client.c:915

> > > > > > 4  0x00000000004a1d87 in discovery_op_complete (op=0x1748450, success=<optimized out>, ecode=<optimized out>) at src/shared/gatt-helpers.c:615

> > > > > > 5  0x00000000004a2379 in discover_descs_cb (opcode=<optimized out>, pdu=0x174d551, length=<optimized out>, user_data=0x1748450) at src/shared/gatt-helpers.c:1465

> > > > > > 6  0x00000000004966db in handle_rsp (pdu_len=4, pdu=<optimized out>, opcode=<optimized out>, chan=0x17483c0) at src/shared/att.c:814

> > > > > > 7  can_read_data (io=<optimized out>, user_data=0x17483c0) at src/shared/att.c:1011

> > > > >

> > > > > Weird, are you still receiving data after the device is disconnected?

> > > > To be more precise, this happens if I do "remove device" from

> > > > bluetoothctl while it's still doing GATT discovery, i.e. connect and

> > > > quickly remove device. Also this is reproducible if the GATT.Cache

> > > > config is set to "yes". So, the "disconnection" may not be physical

> > > > but a consequence of the code path of the RemoveDevice API. I can

> > > > reproduce it almost 100% of the time with an LE Mouse (tried with MS

> > > > Mouse 3600 and Logitech M355).

> > > > >

> > > > > > 8  0x00000000004a0853 in watch_callback (channel=<optimized out>, cond=<optimized out>, user_data=<optimized out>) at src/shared/io-glib.c:157

> > > > > > 9  0x00007fb3f2d7fe87 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0

> > > > > > 10 0x00007fb3f2d80230 in ?? () from /usr/lib64/libglib-2.0.so.0

> > > > > > 11 0x00007fb3f2d80542 in g_main_loop_run () from /usr/lib64/libglib-2.0.so.0

> > > > > > 12 0x00000000004a0e25 in mainloop_run () at src/shared/mainloop-glib.c:66

> > > > > > 13 0x00000000004a11f2 in mainloop_run_with_signal (func=func@entry=0x43f200 <signal_callback>, user_data=user_data@entry=0x0) at src/shared/mainloop-notify.c:188

> > > > > > 14 0x000000000040c72e in main (argc=<optimized out>, argv=<optimized out>) at src/main.c:959

> > > > > >

> > > > > > Reviewed-By: Archie Pusaka <apusaka@chromium.org>

> > > > > >

> > > > > > ---

> > > > > >  src/device.c             |  3 +++

> > > > > >  src/shared/gatt-client.c | 20 ++++++++++++++++++++

> > > > > >  src/shared/gatt-client.h |  2 ++

> > > > > >  3 files changed, 25 insertions(+)

> > > > > >

> > > > > > diff --git a/src/device.c b/src/device.c

> > > > > > index a5ef46730..9dad746eb 100644

> > > > > > --- a/src/device.c

> > > > > > +++ b/src/device.c

> > > > > > @@ -586,6 +586,9 @@ static void gatt_client_cleanup(struct btd_device *device)

> > > > > >                 device->gatt_ready_id = 0;

> > > > > >         }

> > > > > >

> > > > > > +       /* Make sure that service discovery is stopped if any is in progress */

> > > > > > +       bt_gatt_client_reset_in_discovery(device->client);

> > > > >

> > > > > Does this means that other references will also get interrupted? That

> > > > > is something very weird here since when disconnected the bt_att

> > > > > instance should actually have informed the bt_gatt_client and the

> > > > > cleanup should had happened there.

> > > > What do you mean by "other references"? My guess is that

> > > > bt_gatt_client has done the cleanup, but discovery_op_complete still

> > > > tries to execute the whole code path which involves accessing the

> > > > already cleaned-up gatt_db_attribute objects.

> > >

> > > Here normally what should have happened is:

> > >

> > > bt_gatt_client_unref -> bt_gatt_client_free ->

> > > bt_gatt_client_cancel_all -> bt_gatt_request_unref ->

> > > discovery_op_unref -> discovery_op_free

> > >

> > > So there is something about this code path that is not really working

> > > as intended or perhaps bt_gatt_client_free is never really called? It

> > > would perhaps be worth trying to reproduce this with a a unit test.

> > >

> > > > >

> > > > > > +

> > > > > >         bt_gatt_client_unref(device->client);

> > > > > >         device->client = NULL;

> > > > > >  }

> > > > > > diff --git a/src/shared/gatt-client.c b/src/shared/gatt-client.c

> > > > > > index 8becf1c6c..2ba6efe9a 100644

> > > > > > --- a/src/shared/gatt-client.c

> > > > > > +++ b/src/shared/gatt-client.c

> > > > > > @@ -104,6 +104,9 @@ struct bt_gatt_client {

> > > > > >

> > > > > >         struct bt_gatt_request *discovery_req;

> > > > > >         unsigned int mtu_req_id;

> > > > > > +

> > > > > > +       /* Whether there is a service discovery operation ongoing */

> > > > > > +       bool in_discovery;

> > > > > >  };

> > > > > >

> > > > > >  struct request {

> > > > > > @@ -381,6 +384,12 @@ static void discovery_op_complete(struct discovery_op *op, bool success,

> > > > > >  {

> > > > > >         const struct queue_entry *svc;

> > > > > >

> > > > > > +       /* Service discovery may be reset due to disconnection */

> > > > > > +       if (!op->client->in_discovery)

> > > > > > +               return;

> > > > > > +

> > > > > > +       op->client->in_discovery = false;

> > > > > > +

> > > > > >         op->success = success;

> > > > > >

> > > > > >         /* Read database hash if discovery has been successful */

> > > > > > @@ -1857,6 +1866,9 @@ static void process_service_changed(struct bt_gatt_client *client,

> > > > > >  {

> > > > > >         struct discovery_op *op;

> > > > > >

> > > > > > +       if (client->in_discovery)

> > > > > > +               goto fail;

> > > > > > +

> > > > > >         op = discovery_op_create(client, start_handle, end_handle,

> > > > > >                                                 service_changed_complete,

> > > > > >                                                 service_changed_failure);

> > > > > > @@ -1869,6 +1881,7 @@ static void process_service_changed(struct bt_gatt_client *client,

> > > > > >                                                 discovery_op_ref(op),

> > > > > >                                                 discovery_op_unref);

> > > > > >         if (client->discovery_req) {

> > > > > > +               client->in_discovery = true;

> > > > > >                 client->in_svc_chngd = true;

> > > > > >                 return;

> > > > > >         }

> > > > > > @@ -2057,6 +2070,7 @@ static bool gatt_client_init(struct bt_gatt_client *client, uint16_t mtu)

> > > > > >                 return false;

> > > > > >         }

> > > > > >

> > > > > > +       client->in_discovery = true;

> > > > > >         client->in_init = true;

> > > > > >

> > > > > >         return true;

> > > > > > @@ -2080,6 +2094,7 @@ discover:

> > > > > >         }

> > > > > >

> > > > > >  done:

> > > > > > +       client->in_discovery = true;

> > > > > >         client->in_init = true;

> > > > > >         return true;

> > > > > >  }

> > > > > > @@ -3694,3 +3709,8 @@ int bt_gatt_client_get_security(struct bt_gatt_client *client)

> > > > > >

> > > > > >         return bt_att_get_security(client->att, NULL);

> > > > > >  }

> > > > > > +

> > > > > > +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client)

> > > > > > +{

> > > > > > +       client->in_discovery = false;

> > > > > > +}

> > > > > > diff --git a/src/shared/gatt-client.h b/src/shared/gatt-client.h

> > > > > > index dc5102394..dcd8e5cf6 100644

> > > > > > --- a/src/shared/gatt-client.h

> > > > > > +++ b/src/shared/gatt-client.h

> > > > > > @@ -126,3 +126,5 @@ bool bt_gatt_client_unregister_notify(struct bt_gatt_client *client,

> > > > > >

> > > > > >  bool bt_gatt_client_set_security(struct bt_gatt_client *client, int level);

> > > > > >  int bt_gatt_client_get_security(struct bt_gatt_client *client);

> > > > > > +

> > > > > > +void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client);

> > > > > > --

> > > > > > 2.26.2

> > > > > >

> > > > >

> > > > >

> > > > > --

> > > > > Luiz Augusto von Dentz

> > >

> > >

> > >

> > > --

> > > Luiz Augusto von Dentz

>

>

>

> --

> Luiz Augusto von Dentz
diff mbox series

Patch

diff --git a/src/device.c b/src/device.c
index a5ef46730..9dad746eb 100644
--- a/src/device.c
+++ b/src/device.c
@@ -586,6 +586,9 @@  static void gatt_client_cleanup(struct btd_device *device)
 		device->gatt_ready_id = 0;
 	}
 
+	/* Make sure that service discovery is stopped if any is in progress */
+	bt_gatt_client_reset_in_discovery(device->client);
+
 	bt_gatt_client_unref(device->client);
 	device->client = NULL;
 }
diff --git a/src/shared/gatt-client.c b/src/shared/gatt-client.c
index 8becf1c6c..2ba6efe9a 100644
--- a/src/shared/gatt-client.c
+++ b/src/shared/gatt-client.c
@@ -104,6 +104,9 @@  struct bt_gatt_client {
 
 	struct bt_gatt_request *discovery_req;
 	unsigned int mtu_req_id;
+
+	/* Whether there is a service discovery operation ongoing */
+	bool in_discovery;
 };
 
 struct request {
@@ -381,6 +384,12 @@  static void discovery_op_complete(struct discovery_op *op, bool success,
 {
 	const struct queue_entry *svc;
 
+	/* Service discovery may be reset due to disconnection */
+	if (!op->client->in_discovery)
+		return;
+
+	op->client->in_discovery = false;
+
 	op->success = success;
 
 	/* Read database hash if discovery has been successful */
@@ -1857,6 +1866,9 @@  static void process_service_changed(struct bt_gatt_client *client,
 {
 	struct discovery_op *op;
 
+	if (client->in_discovery)
+		goto fail;
+
 	op = discovery_op_create(client, start_handle, end_handle,
 						service_changed_complete,
 						service_changed_failure);
@@ -1869,6 +1881,7 @@  static void process_service_changed(struct bt_gatt_client *client,
 						discovery_op_ref(op),
 						discovery_op_unref);
 	if (client->discovery_req) {
+		client->in_discovery = true;
 		client->in_svc_chngd = true;
 		return;
 	}
@@ -2057,6 +2070,7 @@  static bool gatt_client_init(struct bt_gatt_client *client, uint16_t mtu)
 		return false;
 	}
 
+	client->in_discovery = true;
 	client->in_init = true;
 
 	return true;
@@ -2080,6 +2094,7 @@  discover:
 	}
 
 done:
+	client->in_discovery = true;
 	client->in_init = true;
 	return true;
 }
@@ -3694,3 +3709,8 @@  int bt_gatt_client_get_security(struct bt_gatt_client *client)
 
 	return bt_att_get_security(client->att, NULL);
 }
+
+void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client)
+{
+	client->in_discovery = false;
+}
diff --git a/src/shared/gatt-client.h b/src/shared/gatt-client.h
index dc5102394..dcd8e5cf6 100644
--- a/src/shared/gatt-client.h
+++ b/src/shared/gatt-client.h
@@ -126,3 +126,5 @@  bool bt_gatt_client_unregister_notify(struct bt_gatt_client *client,
 
 bool bt_gatt_client_set_security(struct bt_gatt_client *client, int level);
 int bt_gatt_client_get_security(struct bt_gatt_client *client);
+
+void bt_gatt_client_reset_in_discovery(struct bt_gatt_client *client);