diff mbox series

[RFC,03/11] hte: Add tegra194 HTE kernel provider

Message ID 20210625235532.19575-4-dipenp@nvidia.com
State New
Headers show
Series Intro to Hardware timestamping engine | expand

Commit Message

Dipen Patel June 25, 2021, 11:55 p.m. UTC
Tegra194 device has multiple HTE instances also known as GTE
(Generic hardware Timestamping Engine) which can timestamp subset of
SoC lines/signals. This provider driver focuses on IRQ and GPIO lines
and exposes timestamping ability on those lines to the consumers
through HTE subsystem.

Also, with this patch, added:
- documentation about this provider and its capabilities at
Documentation/hte.
- Compilation support in Makefile and Kconfig

Signed-off-by: Dipen Patel <dipenp@nvidia.com>
---
 Documentation/hte/index.rst        |  21 ++
 Documentation/hte/tegra194-hte.rst |  65 ++++
 Documentation/index.rst            |   1 +
 drivers/hte/Kconfig                |  12 +
 drivers/hte/Makefile               |   1 +
 drivers/hte/hte-tegra194.c         | 554 +++++++++++++++++++++++++++++
 6 files changed, 654 insertions(+)
 create mode 100644 Documentation/hte/index.rst
 create mode 100644 Documentation/hte/tegra194-hte.rst
 create mode 100644 drivers/hte/hte-tegra194.c

Comments

Kent Gibson July 1, 2021, 2:21 p.m. UTC | #1
On Fri, Jun 25, 2021 at 04:55:24PM -0700, Dipen Patel wrote:
> Tegra194 device has multiple HTE instances also known as GTE

> (Generic hardware Timestamping Engine) which can timestamp subset of

> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines

> and exposes timestamping ability on those lines to the consumers

> through HTE subsystem.

> 

> Also, with this patch, added:

> - documentation about this provider and its capabilities at

> Documentation/hte.

> - Compilation support in Makefile and Kconfig

> 

> Signed-off-by: Dipen Patel <dipenp@nvidia.com>

> ---

>  Documentation/hte/index.rst        |  21 ++

>  Documentation/hte/tegra194-hte.rst |  65 ++++

>  Documentation/index.rst            |   1 +

>  drivers/hte/Kconfig                |  12 +

>  drivers/hte/Makefile               |   1 +

>  drivers/hte/hte-tegra194.c         | 554 +++++++++++++++++++++++++++++

>  6 files changed, 654 insertions(+)

>  create mode 100644 Documentation/hte/index.rst

>  create mode 100644 Documentation/hte/tegra194-hte.rst

>  create mode 100644 drivers/hte/hte-tegra194.c

> 

> diff --git a/Documentation/hte/index.rst b/Documentation/hte/index.rst

> new file mode 100644

> index 000000000000..f311ebec6b47

> --- /dev/null

> +++ b/Documentation/hte/index.rst

> @@ -0,0 +1,21 @@

> +.. SPDX-License-Identifier: GPL-2.0

> +

> +============================================

> +The Linux Hardware Timestamping Engine (HTE)

> +============================================

> +

> +The HTE Subsystem

> +=================

> +

> +.. toctree::

> +   :maxdepth: 1

> +

> +   hte

> +

> +HTE Tegra Provider

> +==================

> +

> +.. toctree::

> +   :maxdepth: 1

> +

> +   tegra194-hte

> \ No newline at end of file

> diff --git a/Documentation/hte/tegra194-hte.rst b/Documentation/hte/tegra194-hte.rst

> new file mode 100644

> index 000000000000..c23eaafcf080

> --- /dev/null

> +++ b/Documentation/hte/tegra194-hte.rst

> @@ -0,0 +1,65 @@

> +HTE Kernel provider driver

> +==========================

> +

> +Description

> +-----------

> +The Nvidia tegra194 chip has many hardware timestamping engine (HTE) instances

> +known as generic timestamping engine (GTE). This provider driver implements

> +two GTE instances 1) GPIO GTE and 2) IRQ GTE. The both GTEs instances get the

> +timestamp from the system counter TSC which has 31.25MHz clock rate, and the

> +driver converts clock tick rate to nano seconds before storing it as timestamp

> +value.

> +

> +GPIO GTE

> +--------

> +

> +This GTE instance help timestamps GPIO in real time, for that to happen GPIO

> +needs to be configured as input and IRQ needs to ba enabled as well. The only

> +always on (AON) gpio controller instance supports timestamping GPIOs in

> +realtime and it has 39 GPIO lines. There is also a dependency on AON GPIO

> +controller as it requires very specific bits to be set in GPIO config register.

> +It in a way creates cyclic dependency between GTE and GPIO controller. The GTE

> +GPIO functionality is accessed from the GPIOLIB. It can support both the in

> +kernel and userspace consumers. In the later case, requests go through GPIOLIB

> +CDEV framework. The below APIs are added in GPIOLIB framework to access HTE

> +subsystem and GPIO GTE for in kernel consumers.

> +

> +.. c:function:: int gpiod_hw_timestamp_control( struct gpio_desc *desc, bool enable )

> +

> +	To enable HTE on given GPIO line.

> +

> +.. c:function:: u64 gpiod_get_hw_timestamp( struct gpio_desc *desc, bool block )

> +

> +	To retrieve hardwre timestamp in nano seconds.

> +

> +.. c:function:: bool gpiod_is_hw_timestamp_enabled( const struct gpio_desc *desc )

> +

> +	To query if HTE is enabled on the given GPIO.

> +

> +There is hte-tegra194-gpio-test.c, located in ``drivers/hte/`` directory, test

> +driver which demonstrates above APIs for the Jetson AGX platform. For userspace

> +consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HARDWARE flag must be specifed during

> +IOCTL calls, refer ``tools/gpio/gpio-event-mon.c``, which returns the timestamp

> +in nano second.

> +


<snip>

> +

> +static void tegra_hte_read_fifo(struct tegra_hte_soc *gs)

> +{

> +	u32 tsh, tsl, src, pv, cv, acv, slice, bit_index, line_id;

> +	u64 tsc;

> +	int dir;

> +	struct hte_ts_data el;

> +

> +	while ((tegra_hte_readl(gs, HTE_TESTATUS) >>

> +		HTE_TESTATUS_OCCUPANCY_SHIFT) &

> +		HTE_TESTATUS_OCCUPANCY_MASK) {

> +		tsh = tegra_hte_readl(gs, HTE_TETSCH);

> +		tsl = tegra_hte_readl(gs, HTE_TETSCL);

> +		tsc = (((u64)tsh << 32) | tsl);

> +

> +		src = tegra_hte_readl(gs, HTE_TESRC);

> +		slice = (src >> HTE_TESRC_SLICE_SHIFT) &

> +			    HTE_TESRC_SLICE_DEFAULT_MASK;

> +

> +		pv = tegra_hte_readl(gs, HTE_TEPCV);

> +		cv = tegra_hte_readl(gs, HTE_TECCV);

> +		acv = pv ^ cv;

> +		while (acv) {

> +			bit_index = __builtin_ctz(acv);

> +			if ((pv >> bit_index) & BIT(0))

> +				dir = HTE_EVENT_RISING_EDGE;

> +			else

> +				dir = HTE_EVENT_FALLING_EDGE;

> +

> +			line_id = bit_index + (slice << 5);

> +			el.dir = dir;

> +			el.tsc = tsc << HTE_TS_NS_SHIFT;

> +			hte_push_ts_ns_atomic(gs->chip, line_id, &el,

> +					      sizeof(el));

> +			acv &= ~BIT(bit_index);

> +		}

> +		tegra_hte_writel(gs, HTE_TECMD, HTE_TECMD_CMD_POP);

> +	}

> +}


What happens when the hte_push_ts_ns_atomic() fails?
The timestamp will be quietly dropped?
What happens when the interrupt corresponding to that dropped timestamp
asks for it?  The irq handler thread will block until it can get a
timestamp from the subsequent interrupt?

Which brings me back to the concern I have with the approach used in
the hte/gpiolib integration - how do you guarantee that the timestamp
returned by gpiod_get_hw_timestamp() corresponds to the irq interrupt
being handled, particularly in the face of errors such as:
 - overflows of the timestamp FIFO in the chip
 - overflows of software FIFOs as here
 - lost interupts (if the hw generates interrupts faster than the CPU
   can service them)
?

Cheers,
Kent.
Jonathan Cameron July 4, 2021, 8:27 p.m. UTC | #2
On Fri, 25 Jun 2021 16:55:24 -0700
Dipen Patel <dipenp@nvidia.com> wrote:

> Tegra194 device has multiple HTE instances also known as GTE

> (Generic hardware Timestamping Engine) which can timestamp subset of

> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines

> and exposes timestamping ability on those lines to the consumers

> through HTE subsystem.

> 

> Also, with this patch, added:

> - documentation about this provider and its capabilities at

> Documentation/hte.

> - Compilation support in Makefile and Kconfig

> 

> Signed-off-by: Dipen Patel <dipenp@nvidia.com>


A few comments inline,

J
> ---

>  Documentation/hte/index.rst        |  21 ++

>  Documentation/hte/tegra194-hte.rst |  65 ++++

>  Documentation/index.rst            |   1 +

>  drivers/hte/Kconfig                |  12 +

>  drivers/hte/Makefile               |   1 +

>  drivers/hte/hte-tegra194.c         | 554 +++++++++++++++++++++++++++++

>  6 files changed, 654 insertions(+)

>  create mode 100644 Documentation/hte/index.rst

>  create mode 100644 Documentation/hte/tegra194-hte.rst

>  create mode 100644 drivers/hte/hte-tegra194.c

> 

> diff --git a/Documentation/hte/index.rst b/Documentation/hte/index.rst

> new file mode 100644

> index 000000000000..f311ebec6b47

> --- /dev/null

> +++ b/Documentation/hte/index.rst

> @@ -0,0 +1,21 @@

> +.. SPDX-License-Identifier: GPL-2.0

> +

> +============================================

> +The Linux Hardware Timestamping Engine (HTE)

> +============================================

> +

> +The HTE Subsystem

> +=================

> +

> +.. toctree::

> +   :maxdepth: 1

> +

> +   hte

> +

> +HTE Tegra Provider

> +==================

> +

> +.. toctree::

> +   :maxdepth: 1

> +

> +   tegra194-hte

> \ No newline at end of file

> diff --git a/Documentation/hte/tegra194-hte.rst b/Documentation/hte/tegra194-hte.rst

> new file mode 100644

> index 000000000000..c23eaafcf080

> --- /dev/null

> +++ b/Documentation/hte/tegra194-hte.rst

> @@ -0,0 +1,65 @@

> +HTE Kernel provider driver

> +==========================

> +

> +Description

> +-----------

> +The Nvidia tegra194 chip has many hardware timestamping engine (HTE) instances

> +known as generic timestamping engine (GTE). This provider driver implements

> +two GTE instances 1) GPIO GTE and 2) IRQ GTE. The both GTEs instances get the

> +timestamp from the system counter TSC which has 31.25MHz clock rate, and the

> +driver converts clock tick rate to nano seconds before storing it as timestamp

> +value.

> +

> +GPIO GTE

> +--------

> +

> +This GTE instance help timestamps GPIO in real time, for that to happen GPIO

> +needs to be configured as input and IRQ needs to ba enabled as well. The only

> +always on (AON) gpio controller instance supports timestamping GPIOs in

> +realtime and it has 39 GPIO lines. There is also a dependency on AON GPIO

> +controller as it requires very specific bits to be set in GPIO config register.

> +It in a way creates cyclic dependency between GTE and GPIO controller. The GTE

> +GPIO functionality is accessed from the GPIOLIB. It can support both the in

> +kernel and userspace consumers. In the later case, requests go through GPIOLIB

> +CDEV framework. The below APIs are added in GPIOLIB framework to access HTE

> +subsystem and GPIO GTE for in kernel consumers.

> +

> +.. c:function:: int gpiod_hw_timestamp_control( struct gpio_desc *desc, bool enable )

> +

> +	To enable HTE on given GPIO line.

> +

> +.. c:function:: u64 gpiod_get_hw_timestamp( struct gpio_desc *desc, bool block )

> +

> +	To retrieve hardwre timestamp in nano seconds.

> +

> +.. c:function:: bool gpiod_is_hw_timestamp_enabled( const struct gpio_desc *desc )

> +

> +	To query if HTE is enabled on the given GPIO.

> +

> +There is hte-tegra194-gpio-test.c, located in ``drivers/hte/`` directory, test

> +driver which demonstrates above APIs for the Jetson AGX platform. For userspace

> +consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HARDWARE flag must be specifed during

> +IOCTL calls, refer ``tools/gpio/gpio-event-mon.c``, which returns the timestamp

> +in nano second.

> +

> +IRQ GTE

> +--------

> +

> +This GTE instance helps timestamp LIC IRQ lines in real time. There are 352 IRQ

> +lines which this instance can help timestamp realtime. The hte devicetree

> +binding described at ``Documentation/devicetree/bindings/hte/`` gives out

> +example how consumer can request IRQ line, since it is one to one mapping,

> +consumers can simply specify IRQ number that they are interested in. There is

> +no userspace consumer support for this GTE instance. The sample test code

> +hte-tegra194-irq-test.c, located in ``drivers/hte/`` directory,

> +demonstrates how to use IRQ GTE instance. The below is sample device tree

> +snippet code for the test driver::

> +

> + tegra_hte_irq_test {

> +        compatible = "nvidia,tegra194-hte-irq-test";

> +        htes = <&tegra_hte_lic 0x19>;

> +        hte-names = "hte-lic";

> + };

> +

> +The source code of the driver both IRQ and GPIO GTE is locate at

> +``drivers/hte/hte-tegra194.c``.

> \ No newline at end of file

> diff --git a/Documentation/index.rst b/Documentation/index.rst

> index 1b13c2445e87..b41118577fe6 100644

> --- a/Documentation/index.rst

> +++ b/Documentation/index.rst

> @@ -138,6 +138,7 @@ needed).

>     misc-devices/index

>     scheduler/index

>     mhi/index

> +   hte/index

>  

>  Architecture-agnostic documentation

>  -----------------------------------

> diff --git a/drivers/hte/Kconfig b/drivers/hte/Kconfig

> index 394e112f7dfb..f7b01fcc7190 100644

> --- a/drivers/hte/Kconfig

> +++ b/drivers/hte/Kconfig

> @@ -20,3 +20,15 @@ menuconfig HTE

>  

>            If unsure, say no.

>  

> +if HTE

> +

> +config HTE_TEGRA194

> +	tristate "NVIDIA Tegra194 HTE Support"

> +	depends on ARCH_TEGRA_194_SOC

> +	help

> +	  Enable this option for integrated hardware timestamping engine also

> +	  known as generic timestamping engine (GTE) support on NVIDIA Tegra194

> +	  systems-on-chip. The driver supports 352 LIC IRQs and 39 AON GPIOs

> +	  lines for timestamping in realtime.

> +

> +endif

> diff --git a/drivers/hte/Makefile b/drivers/hte/Makefile

> index 9899dbe516f7..52f978cfc913 100644

> --- a/drivers/hte/Makefile

> +++ b/drivers/hte/Makefile

> @@ -1 +1,2 @@

>  obj-$(CONFIG_HTE)		+= hte.o

> +obj-$(CONFIG_HTE_TEGRA194)	+= hte-tegra194.o

> \ No newline at end of file


fix that

> diff --git a/drivers/hte/hte-tegra194.c b/drivers/hte/hte-tegra194.c

> new file mode 100644

> index 000000000000..8ad10efd3641

> --- /dev/null

> +++ b/drivers/hte/hte-tegra194.c

> @@ -0,0 +1,554 @@

> +// SPDX-License-Identifier: GPL-2.0

> +/*

> + * Copyright (c) 2021 NVIDIA Corporation

> + *

> + * Author: Dipen Patel <dipenp@nvidia.com>

> + */

> +

> +#include <linux/err.h>

> +#include <linux/io.h>

> +#include <linux/module.h>

> +#include <linux/slab.h>

> +#include <linux/stat.h>

> +#include <linux/interrupt.h>

> +#include <linux/of.h>

> +#include <linux/of_device.h>

> +#include <linux/platform_device.h>

> +#include <linux/hte.h>

> +#include <linux/uaccess.h>

> +

> +#define HTE_SUSPEND	0

> +

> +/* HTE source clock TSC is 31.25MHz */

> +#define HTE_TS_CLK_RATE_HZ	31250000ULL

> +#define HTE_CLK_RATE_NS		32

> +#define HTE_TS_NS_SHIFT	__builtin_ctz(HTE_CLK_RATE_NS)

> +

> +#define NV_AON_SLICE_INVALID	-1

> +

> +/* AON HTE line map For slice 1 */

> +#define NV_AON_HTE_SLICE1_IRQ_GPIO_28	12

> +#define NV_AON_HTE_SLICE1_IRQ_GPIO_29	13

> +

> +/* AON HTE line map For slice 2 */

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_0	0

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_1	1

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_2	2

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_3	3

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_4	4

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_5	5

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_6	6

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_7	7

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_8	8

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_9	9

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_10	10

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_11	11

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_12	12

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_13	13

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_14	14

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_15	15

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_16	16

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_17	17

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_18	18

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_19	19

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_20	20

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_21	21

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_22	22

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_23	23

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_24	24

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_25	25

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_26	26

> +#define NV_AON_HTE_SLICE2_IRQ_GPIO_27	27

> +

> +/* AON GPIO port AA pins */

> +#define NV_AON_GPIO_PORT_AA_0		0

> +#define NV_AON_GPIO_PORT_AA_1		1

> +#define NV_AON_GPIO_PORT_AA_2		2

> +#define NV_AON_GPIO_PORT_AA_3		3

> +#define NV_AON_GPIO_PORT_AA_4		4

> +#define NV_AON_GPIO_PORT_AA_5		5

> +#define NV_AON_GPIO_PORT_AA_6		6

> +#define NV_AON_GPIO_PORT_AA_7		7

> +

> +/* AON GPIO port BB pins */

> +#define NV_AON_GPIO_PORT_BB_0		8

> +#define NV_AON_GPIO_PORT_BB_1		9

> +#define NV_AON_GPIO_PORT_BB_2		10

> +#define NV_AON_GPIO_PORT_BB_3		11

> +

> +/* AON GPIO port CC pins */

> +#define NV_AON_GPIO_PORT_CC_0		16

> +#define NV_AON_GPIO_PORT_CC_1		17

> +#define NV_AON_GPIO_PORT_CC_2		18

> +#define NV_AON_GPIO_PORT_CC_3		19

> +#define NV_AON_GPIO_PORT_CC_4		20

> +#define NV_AON_GPIO_PORT_CC_5		21

> +#define NV_AON_GPIO_PORT_CC_6		22

> +#define NV_AON_GPIO_PORT_CC_7		23

> +

> +/* AON GPIO port DD pins */

> +#define NV_AON_GPIO_PORT_DD_0		24

> +#define NV_AON_GPIO_PORT_DD_1		25

> +#define NV_AON_GPIO_PORT_DD_2		26

> +

> +/* AON GPIO port EE pins */

> +#define NV_AON_GPIO_PORT_EE_0		32

> +#define NV_AON_GPIO_PORT_EE_1		33

> +#define NV_AON_GPIO_PORT_EE_2		34

> +#define NV_AON_GPIO_PORT_EE_3		35

> +#define NV_AON_GPIO_PORT_EE_4		36

> +#define NV_AON_GPIO_PORT_EE_5		37

> +#define NV_AON_GPIO_PORT_EE_6		38

> +

> +

> +#define HTE_TECTRL		0x0

> +#define HTE_TETSCH		0x4

> +#define HTE_TETSCL		0x8

> +#define HTE_TESRC		0xC

> +#define HTE_TECCV		0x10

> +#define HTE_TEPCV		0x14

> +#define HTE_TECMD		0x1C

> +#define HTE_TESTATUS		0x20

> +#define HTE_SLICE0_TETEN	0x40

> +#define HTE_SLICE1_TETEN	0x60

> +

> +#define HTE_SLICE_SIZE		(HTE_SLICE1_TETEN - HTE_SLICE0_TETEN)

> +

> +#define HTE_TECTRL_ENABLE_ENABLE	0x1

> +

> +#define HTE_TECTRL_OCCU_SHIFT		0x8

> +#define HTE_TECTRL_INTR_SHIFT		0x1

> +#define HTE_TECTRL_INTR_ENABLE		0x1

> +

> +#define HTE_TESRC_SLICE_SHIFT		16

> +#define HTE_TESRC_SLICE_DEFAULT_MASK	0xFF

> +

> +#define HTE_TECMD_CMD_POP		0x1

> +

> +#define HTE_TESTATUS_OCCUPANCY_SHIFT	8

> +#define HTE_TESTATUS_OCCUPANCY_MASK	0xFF

> +

> +struct hte_slices {

> +	u32 r_val;

> +	unsigned long flags;

> +	/* to prevent lines mapped to same slice updating its register */

> +	spinlock_t s_lock;

> +};

> +

> +struct tegra_hte_line_mapped {

> +	int slice;

> +	u32 bit_index;

> +};

> +

> +struct tegra_hte_line_table {

> +	int map_sz;

> +	const struct tegra_hte_line_mapped *map;

> +};

> +

> +struct tegra_hte_soc {

> +	int hte_irq;

> +	u32 itr_thrshld;

> +	u32 conf_rval;

> +	struct hte_slices *sl;

> +	const struct tegra_hte_line_table *line_map;

> +	struct hte_chip *chip;

> +	void __iomem *regs;

> +};

> +

> +static const struct tegra_hte_line_mapped tegra194_aon_gpio_map[] = {

> +	/* gpio, slice, bit_index */

> +	[NV_AON_GPIO_PORT_AA_0]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_11},

> +	[NV_AON_GPIO_PORT_AA_1]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_10},

> +	[NV_AON_GPIO_PORT_AA_2]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_9},

> +	[NV_AON_GPIO_PORT_AA_3]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_8},

> +	[NV_AON_GPIO_PORT_AA_4]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_7},

> +	[NV_AON_GPIO_PORT_AA_5]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_6},

> +	[NV_AON_GPIO_PORT_AA_6]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_5},

> +	[NV_AON_GPIO_PORT_AA_7]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_4},

> +	[NV_AON_GPIO_PORT_BB_0]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_3},

> +	[NV_AON_GPIO_PORT_BB_1]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_2},

> +	[NV_AON_GPIO_PORT_BB_2] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_1},

> +	[NV_AON_GPIO_PORT_BB_3] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_0},

> +	[12] = {NV_AON_SLICE_INVALID, 0},

> +	[13] = {NV_AON_SLICE_INVALID, 0},

> +	[14] = {NV_AON_SLICE_INVALID, 0},

> +	[15] = {NV_AON_SLICE_INVALID, 0},

> +	[NV_AON_GPIO_PORT_CC_0] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_22},

> +	[NV_AON_GPIO_PORT_CC_1] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_21},

> +	[NV_AON_GPIO_PORT_CC_2] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_20},

> +	[NV_AON_GPIO_PORT_CC_3] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_19},

> +	[NV_AON_GPIO_PORT_CC_4] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_18},

> +	[NV_AON_GPIO_PORT_CC_5] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_17},

> +	[NV_AON_GPIO_PORT_CC_6] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_16},

> +	[NV_AON_GPIO_PORT_CC_7] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_15},

> +	[NV_AON_GPIO_PORT_DD_0] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_14},

> +	[NV_AON_GPIO_PORT_DD_1] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_13},

> +	[NV_AON_GPIO_PORT_DD_2] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_12},

> +	[27] = {NV_AON_SLICE_INVALID, 0},

> +	[28] = {NV_AON_SLICE_INVALID, 0},

> +	[29] = {NV_AON_SLICE_INVALID, 0},

> +	[30] = {NV_AON_SLICE_INVALID, 0},

> +	[31] = {NV_AON_SLICE_INVALID, 0},

> +	[NV_AON_GPIO_PORT_EE_0] = {1, NV_AON_HTE_SLICE1_IRQ_GPIO_29},

> +	[NV_AON_GPIO_PORT_EE_1] = {1, NV_AON_HTE_SLICE1_IRQ_GPIO_28},

> +	[NV_AON_GPIO_PORT_EE_2] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_27},

> +	[NV_AON_GPIO_PORT_EE_3] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_26},

> +	[NV_AON_GPIO_PORT_EE_4] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_25},

> +	[NV_AON_GPIO_PORT_EE_5] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_24},

> +	[NV_AON_GPIO_PORT_EE_6] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_23},

> +};

> +

> +static const struct tegra_hte_line_table aon_hte_map = {

> +	.map_sz = ARRAY_SIZE(tegra194_aon_gpio_map),

> +	.map = tegra194_aon_gpio_map,

> +};

> +

> +static inline u32 tegra_hte_readl(struct tegra_hte_soc *hte, u32 reg)

> +{

> +	return readl(hte->regs + reg);

> +}

> +

> +static inline void tegra_hte_writel(struct tegra_hte_soc *hte, u32 reg,

> +				    u32 val)

> +{

> +	writel(val, hte->regs + reg);

> +}

> +

> +static inline int tegra_hte_map_to_line_id(u32 eid, struct tegra_hte_soc *gs,

> +					  u32 *mapped)

> +{

> +	const struct tegra_hte_line_mapped *m;

> +

> +	if (gs->line_map) {

> +		m = gs->line_map->map;

> +		if (eid > gs->line_map->map_sz)

> +			return -EINVAL;

> +		if (m[eid].slice == NV_AON_SLICE_INVALID)

> +			return -EINVAL;

> +

> +		*mapped = (m[eid].slice << 5) + m[eid].bit_index;

> +	} else {

> +		*mapped = eid;

> +	}

> +

> +	return 0;

> +}

> +

> +static int tegra_hte_line_xlate(struct hte_chip *gc,

> +				 const struct of_phandle_args *args,

> +				 struct hte_ts_desc *desc, u32 *xlated_id)

> +{

> +	int ret = 0;

> +

> +	if (!gc || !desc || !xlated_id)

> +		return -EINVAL;

> +

> +	if (args) {

> +		if (gc->of_hte_n_cells < 1)

> +			return -EINVAL;

> +

> +		if (args->args_count != gc->of_hte_n_cells)

> +			return -EINVAL;

> +

> +		desc->con_id = args->args[0];

> +	}

> +

> +	ret = tegra_hte_map_to_line_id(desc->con_id, gc->data,

> +				       xlated_id);

> +	if (ret < 0) {

> +		dev_dbg(gc->dev, "con_id:%u mapping failed\n",

> +			desc->con_id);

> +		return ret;

> +	}

> +

> +	if (*xlated_id > gc->nlines)

> +		return -EINVAL;

> +

> +	dev_dbg(gc->dev, "requested id:%u, xlated id:%u\n",

> +		desc->con_id, *xlated_id);

> +

> +	return 0;

> +}

> +

> +static int tegra_hte_en_dis_common(struct hte_chip *chip, u32 line_id, bool en)

> +{

> +	u32 slice, sl_bit_shift, line_bit, val, reg;

> +	struct tegra_hte_soc *gs;

> +

> +	sl_bit_shift = __builtin_ctz(HTE_SLICE_SIZE);

> +

> +	if (!chip)

> +		return -EINVAL;

> +

> +	gs = (struct tegra_hte_soc *)chip->data;

> +

> +	if (line_id > chip->nlines) {

> +		dev_err(chip->dev,

> +			"line id: %u is not supported by this controller\n",

> +			line_id);

> +		return -EINVAL;

> +	}

> +

> +	slice = line_id >> sl_bit_shift;

> +	line_bit = line_id & (HTE_SLICE_SIZE - 1);

> +	reg = (slice << sl_bit_shift) + HTE_SLICE0_TETEN;

> +

> +	spin_lock(&gs->sl[slice].s_lock);

> +

> +	if (test_bit(HTE_SUSPEND, &gs->sl[slice].flags)) {

> +		spin_unlock(&gs->sl[slice].s_lock);

> +		dev_dbg(chip->dev, "device suspended");

> +		return -EBUSY;

> +	}

> +

> +	val = tegra_hte_readl(gs, reg);

> +	if (en)

> +		val = val | (1 << line_bit);

> +	else

> +		val = val & (~(1 << line_bit));

> +	tegra_hte_writel(gs, reg, val);

> +

> +	spin_unlock(&gs->sl[slice].s_lock);

> +

> +	dev_dbg(chip->dev, "line: %u, slice %u, line_bit %u, reg:0x%x\n",

> +		line_id, slice, line_bit, reg);

> +

> +	return 0;

> +}

> +

> +static int tegra_hte_request(struct hte_chip *chip, u32 line_id)

> +{

> +	return tegra_hte_en_dis_common(chip, line_id, true);

> +}

> +

> +static int tegra_hte_release(struct hte_chip *chip, u32 line_id)

> +{

> +	return tegra_hte_en_dis_common(chip, line_id, false);

> +}

> +

> +static int tegra_hte_clk_src_info(struct hte_chip *chip,

> +				  struct hte_clk_info *ci)

> +{

> +	(void)chip;

> +

> +	ci->hz = HTE_TS_CLK_RATE_HZ;

> +	ci->type = CLOCK_MONOTONIC;

> +

> +	return 0;

> +}

> +

> +static void tegra_hte_read_fifo(struct tegra_hte_soc *gs)

> +{

> +	u32 tsh, tsl, src, pv, cv, acv, slice, bit_index, line_id;

> +	u64 tsc;

> +	int dir;

> +	struct hte_ts_data el;

> +

> +	while ((tegra_hte_readl(gs, HTE_TESTATUS) >>

> +		HTE_TESTATUS_OCCUPANCY_SHIFT) &

> +		HTE_TESTATUS_OCCUPANCY_MASK) {

> +		tsh = tegra_hte_readl(gs, HTE_TETSCH);

> +		tsl = tegra_hte_readl(gs, HTE_TETSCL);

> +		tsc = (((u64)tsh << 32) | tsl);

> +

> +		src = tegra_hte_readl(gs, HTE_TESRC);

> +		slice = (src >> HTE_TESRC_SLICE_SHIFT) &

> +			    HTE_TESRC_SLICE_DEFAULT_MASK;

> +

> +		pv = tegra_hte_readl(gs, HTE_TEPCV);

> +		cv = tegra_hte_readl(gs, HTE_TECCV);

> +		acv = pv ^ cv;

> +		while (acv) {

> +			bit_index = __builtin_ctz(acv);

> +			if ((pv >> bit_index) & BIT(0))

> +				dir = HTE_EVENT_RISING_EDGE;

> +			else

> +				dir = HTE_EVENT_FALLING_EDGE;

> +

> +			line_id = bit_index + (slice << 5);

> +			el.dir = dir;

> +			el.tsc = tsc << HTE_TS_NS_SHIFT;

> +			hte_push_ts_ns_atomic(gs->chip, line_id, &el,

> +					      sizeof(el));

> +			acv &= ~BIT(bit_index);

> +		}

> +		tegra_hte_writel(gs, HTE_TECMD, HTE_TECMD_CMD_POP);

> +	}

> +}

> +

> +static irqreturn_t tegra_hte_isr(int irq, void *dev_id)

> +{

> +	struct tegra_hte_soc *gs = dev_id;

> +

> +	tegra_hte_read_fifo(gs);

> +

> +	return IRQ_HANDLED;

> +}

> +

> +static const struct of_device_id tegra_hte_of_match[] = {

> +	{ .compatible = "nvidia,tegra194-gte-lic"},

> +	{ .compatible = "nvidia,tegra194-gte-aon", .data = &aon_hte_map},

> +	{ }

> +};

> +MODULE_DEVICE_TABLE(of, tegra_hte_of_match);

> +

> +static const struct hte_ops g_ops = {

> +	.request = tegra_hte_request,

> +	.release = tegra_hte_release,

> +	.enable = tegra_hte_request,

> +	.disable = tegra_hte_release,

> +	.get_clk_src_info = tegra_hte_clk_src_info,

> +};

> +

> +static int tegra_hte_probe(struct platform_device *pdev)

> +{

> +	int ret;

> +	u32 i, slices, val = 0;

> +	struct device *dev;

> +	struct tegra_hte_soc *hte_dev;

> +	struct hte_chip *gc;

> +

> +	dev = &pdev->dev;

> +

> +	hte_dev = devm_kzalloc(dev, sizeof(*hte_dev), GFP_KERNEL);

> +	if (!hte_dev)

> +		return -ENOMEM;

> +

> +	gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL);

> +	if (!gc)

> +		return -ENOMEM;

> +

> +	dev_set_drvdata(&pdev->dev, hte_dev);

> +	hte_dev->line_map = of_device_get_match_data(&pdev->dev);

> +

> +	hte_dev->regs = devm_platform_ioremap_resource(pdev, 0);

> +	if (IS_ERR(hte_dev->regs))

> +		return PTR_ERR(hte_dev->regs);

> +

> +	ret = of_property_read_u32(dev->of_node, "int-threshold",

> +				   &hte_dev->itr_thrshld);

> +	if (ret != 0)

> +		hte_dev->itr_thrshld = 1;

> +

> +	ret = of_property_read_u32(dev->of_node, "slices", &slices);

> +	if (ret != 0) {

> +		dev_err(dev, "Could not read slices\n");

> +		return -EINVAL;

> +	}

> +

> +	hte_dev->sl = devm_kzalloc(dev, sizeof(struct hte_slices) * slices,


Preference for sizeof(*hte_dev->sl) as it saves me checking the size is correct
for the type.

> +				   GFP_KERNEL);

> +	if (!hte_dev->sl)

> +		return -ENOMEM;

> +

> +	ret = platform_get_irq(pdev, 0);

> +	if (ret < 0) {

> +		dev_err(dev, "get irq failed.\n");


dev_err_probe() probably so you don't print the message if deferred probing is
going on.

> +		return ret;

> +	}

> +	hte_dev->hte_irq = ret;

> +	ret = devm_request_irq(dev, hte_dev->hte_irq, tegra_hte_isr, 0,

> +			       dev_name(dev), hte_dev);

> +	if (ret < 0) {

> +		dev_err(dev, "request irq failed.\n");

> +		return ret;

> +	}

> +

> +	gc->nlines = slices << 5;

> +	gc->ops = &g_ops;

> +	gc->dev = dev;

> +	hte_dev->chip = gc;

> +	gc->data = (void *)hte_dev;


Don't case to void * - cast to the actual type if necessary.
If it is a void * then most likely it shouldn't be if we always put something
in particular it in it.

> +	gc->xlate = tegra_hte_line_xlate;

> +	gc->of_hte_n_cells = 1;

> +

> +	ret = hte_register_chip(hte_dev->chip);

> +


No blank line before error handler.  Under the circumstances is that not
fatal?

> +	if (ret)

> +		dev_err(gc->dev, "hte chip register failed");

> +

> +	for (i = 0; i < slices; i++) {

> +		hte_dev->sl[i].flags = 0;

> +		spin_lock_init(&hte_dev->sl[i].s_lock);

> +	}

> +

> +	val = HTE_TECTRL_ENABLE_ENABLE |

> +	      (HTE_TECTRL_INTR_ENABLE << HTE_TECTRL_INTR_SHIFT) |

> +	      (hte_dev->itr_thrshld << HTE_TECTRL_OCCU_SHIFT);

> +	tegra_hte_writel(hte_dev, HTE_TECTRL, val);


You could use a devm_add_action_or_reset() to deal with unwinding this
plus add a devm_hte_register_chip() and then you can get rid of remove
entirely which is always nice.

> +

> +	dev_dbg(gc->dev, "lines: %d, slices:%d", gc->nlines, slices);

> +	return 0;

> +}

> +

> +static int tegra_hte_remove(struct platform_device *pdev)

> +{

> +	struct tegra_hte_soc *gs = dev_get_drvdata(&pdev->dev);

> +

> +	tegra_hte_writel(gs, HTE_TECTRL, 0);

> +

> +	return hte_unregister_chip(gs->chip);

> +}

> +

> +#ifdef CONFIG_PM_SLEEP


Personally I prefer the approach of marking PM functions
__maybe_unused and dropping the ifdef protections.
There have been a lot of subtle issues in the build system in the
past around those and it's much easier to just let the compiler
drop them if they are unused.

> +static int tegra_hte_resume_early(struct device *dev)

> +{

> +	u32 i;

> +	struct tegra_hte_soc *gs = dev_get_drvdata(dev);

> +	u32 slices = gs->chip->nlines >> 5;


Whilst it's the same thing, I'd prefer a divide there by however lines there are in a slice.

> +	u32 sl_bit_shift = __builtin_ctz(HTE_SLICE_SIZE);

> +

> +	tegra_hte_writel(gs, HTE_TECTRL, gs->conf_rval);

> +

> +	for (i = 0; i < slices; i++) {

> +		spin_lock(&gs->sl[i].s_lock);

> +		tegra_hte_writel(gs,

> +				 ((i << sl_bit_shift) + HTE_SLICE0_TETEN),

> +				 gs->sl[i].r_val);

> +		clear_bit(HTE_SUSPEND, &gs->sl[i].flags);

> +		spin_unlock(&gs->sl[i].s_lock);

> +	}

> +

> +	return 0;

> +}

> +

> +static int tegra_hte_suspend_late(struct device *dev)

> +{

> +	u32 i;

> +	struct tegra_hte_soc *gs = dev_get_drvdata(dev);

> +	u32 slices = gs->chip->nlines >> 5;

> +	u32 sl_bit_shift = __builtin_ctz(HTE_SLICE_SIZE);

> +

> +	gs->conf_rval = tegra_hte_readl(gs, HTE_TECTRL);

> +	for (i = 0; i < slices; i++) {

> +		spin_lock(&gs->sl[i].s_lock);

> +		gs->sl[i].r_val = tegra_hte_readl(gs,

> +				((i << sl_bit_shift) + HTE_SLICE0_TETEN));

> +		set_bit(HTE_SUSPEND, &gs->sl[i].flags);

> +		spin_unlock(&gs->sl[i].s_lock);

> +	}

> +

> +	return 0;

> +}

> +#endif

> +

> +static const struct dev_pm_ops tegra_hte_pm = {

> +	SET_LATE_SYSTEM_SLEEP_PM_OPS(tegra_hte_suspend_late,

> +				     tegra_hte_resume_early)

> +};

> +

> +static struct platform_driver tegra_hte_driver = {

> +	.probe = tegra_hte_probe,

> +	.remove = tegra_hte_remove,

> +	.driver = {

> +		.name = "tegra_hte",

> +		.pm = &tegra_hte_pm,

> +		.of_match_table = tegra_hte_of_match,

> +	},

> +};

> +

> +module_platform_driver(tegra_hte_driver);

> +

> +MODULE_AUTHOR("Dipen Patel <dipenp@nvidia.com>");

> +MODULE_DESCRIPTION("NVIDIA Tegra HTE (Hardware Timestamping Engine) driver");

> +MODULE_LICENSE("GPL v2");
Michał Mirosław July 8, 2021, 11:33 p.m. UTC | #3
On Fri, Jun 25, 2021 at 04:55:24PM -0700, Dipen Patel wrote:
> Tegra194 device has multiple HTE instances also known as GTE

> (Generic hardware Timestamping Engine) which can timestamp subset of

> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines

> and exposes timestamping ability on those lines to the consumers

> through HTE subsystem.

[...]
> +	ret = of_property_read_u32(dev->of_node, "slices", &slices);

> +	if (ret != 0) {

> +		dev_err(dev, "Could not read slices\n");

> +		return -EINVAL;

> +	}

> +

> +	hte_dev->sl = devm_kzalloc(dev, sizeof(struct hte_slices) * slices,

> +				   GFP_KERNEL);


Nit: There is devm_kcalloc() that will check for overflow in the
multiply in case @slices from DT is broken.

Best Regards
Michał Mirosław
Dipen Patel July 28, 2021, 11:59 p.m. UTC | #4
Thanks Kent for the review comment. My responses inline.

On 7/1/21 7:21 AM, Kent Gibson wrote:
> On Fri, Jun 25, 2021 at 04:55:24PM -0700, Dipen Patel wrote:
>> Tegra194 device has multiple HTE instances also known as GTE
>> (Generic hardware Timestamping Engine) which can timestamp subset of
>> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines
>> and exposes timestamping ability on those lines to the consumers
>> through HTE subsystem.
>>
>> Also, with this patch, added:
>> - documentation about this provider and its capabilities at
>> Documentation/hte.
>> - Compilation support in Makefile and Kconfig
>>
>> Signed-off-by: Dipen Patel <dipenp@nvidia.com>
>> ---
>>  Documentation/hte/index.rst        |  21 ++
>>  Documentation/hte/tegra194-hte.rst |  65 ++++
>>  Documentation/index.rst            |   1 +
>>  drivers/hte/Kconfig                |  12 +
>>  drivers/hte/Makefile               |   1 +
>>  drivers/hte/hte-tegra194.c         | 554 +++++++++++++++++++++++++++++
>>  6 files changed, 654 insertions(+)
>>  create mode 100644 Documentation/hte/index.rst
>>  create mode 100644 Documentation/hte/tegra194-hte.rst
>>  create mode 100644 drivers/hte/hte-tegra194.c
>>
>> diff --git a/Documentation/hte/index.rst b/Documentation/hte/index.rst
>> new file mode 100644
>> index 000000000000..f311ebec6b47
>> --- /dev/null
>> +++ b/Documentation/hte/index.rst
>> @@ -0,0 +1,21 @@
>> +.. SPDX-License-Identifier: GPL-2.0
>> +
>> +============================================
>> +The Linux Hardware Timestamping Engine (HTE)
>> +============================================
>> +
>> +The HTE Subsystem
>> +=================
>> +
>> +.. toctree::
>> +   :maxdepth: 1
>> +
>> +   hte
>> +
>> +HTE Tegra Provider
>> +==================
>> +
>> +.. toctree::
>> +   :maxdepth: 1
>> +
>> +   tegra194-hte
>> \ No newline at end of file
>> diff --git a/Documentation/hte/tegra194-hte.rst b/Documentation/hte/tegra194-hte.rst
>> new file mode 100644
>> index 000000000000..c23eaafcf080
>> --- /dev/null
>> +++ b/Documentation/hte/tegra194-hte.rst
>> @@ -0,0 +1,65 @@
>> +HTE Kernel provider driver
>> +==========================
>> +
>> +Description
>> +-----------
>> +The Nvidia tegra194 chip has many hardware timestamping engine (HTE) instances
>> +known as generic timestamping engine (GTE). This provider driver implements
>> +two GTE instances 1) GPIO GTE and 2) IRQ GTE. The both GTEs instances get the
>> +timestamp from the system counter TSC which has 31.25MHz clock rate, and the
>> +driver converts clock tick rate to nano seconds before storing it as timestamp
>> +value.
>> +
>> +GPIO GTE
>> +--------
>> +
>> +This GTE instance help timestamps GPIO in real time, for that to happen GPIO
>> +needs to be configured as input and IRQ needs to ba enabled as well. The only
>> +always on (AON) gpio controller instance supports timestamping GPIOs in
>> +realtime and it has 39 GPIO lines. There is also a dependency on AON GPIO
>> +controller as it requires very specific bits to be set in GPIO config register.
>> +It in a way creates cyclic dependency between GTE and GPIO controller. The GTE
>> +GPIO functionality is accessed from the GPIOLIB. It can support both the in
>> +kernel and userspace consumers. In the later case, requests go through GPIOLIB
>> +CDEV framework. The below APIs are added in GPIOLIB framework to access HTE
>> +subsystem and GPIO GTE for in kernel consumers.
>> +
>> +.. c:function:: int gpiod_hw_timestamp_control( struct gpio_desc *desc, bool enable )
>> +
>> +	To enable HTE on given GPIO line.
>> +
>> +.. c:function:: u64 gpiod_get_hw_timestamp( struct gpio_desc *desc, bool block )
>> +
>> +	To retrieve hardwre timestamp in nano seconds.
>> +
>> +.. c:function:: bool gpiod_is_hw_timestamp_enabled( const struct gpio_desc *desc )
>> +
>> +	To query if HTE is enabled on the given GPIO.
>> +
>> +There is hte-tegra194-gpio-test.c, located in ``drivers/hte/`` directory, test
>> +driver which demonstrates above APIs for the Jetson AGX platform. For userspace
>> +consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HARDWARE flag must be specifed during
>> +IOCTL calls, refer ``tools/gpio/gpio-event-mon.c``, which returns the timestamp
>> +in nano second.
>> +
> <snip>
>
>> +
>> +static void tegra_hte_read_fifo(struct tegra_hte_soc *gs)
>> +{
>> +	u32 tsh, tsl, src, pv, cv, acv, slice, bit_index, line_id;
>> +	u64 tsc;
>> +	int dir;
>> +	struct hte_ts_data el;
>> +
>> +	while ((tegra_hte_readl(gs, HTE_TESTATUS) >>
>> +		HTE_TESTATUS_OCCUPANCY_SHIFT) &
>> +		HTE_TESTATUS_OCCUPANCY_MASK) {
>> +		tsh = tegra_hte_readl(gs, HTE_TETSCH);
>> +		tsl = tegra_hte_readl(gs, HTE_TETSCL);
>> +		tsc = (((u64)tsh << 32) | tsl);
>> +
>> +		src = tegra_hte_readl(gs, HTE_TESRC);
>> +		slice = (src >> HTE_TESRC_SLICE_SHIFT) &
>> +			    HTE_TESRC_SLICE_DEFAULT_MASK;
>> +
>> +		pv = tegra_hte_readl(gs, HTE_TEPCV);
>> +		cv = tegra_hte_readl(gs, HTE_TECCV);
>> +		acv = pv ^ cv;
>> +		while (acv) {
>> +			bit_index = __builtin_ctz(acv);
>> +			if ((pv >> bit_index) & BIT(0))
>> +				dir = HTE_EVENT_RISING_EDGE;
>> +			else
>> +				dir = HTE_EVENT_FALLING_EDGE;
>> +
>> +			line_id = bit_index + (slice << 5);
>> +			el.dir = dir;
>> +			el.tsc = tsc << HTE_TS_NS_SHIFT;
>> +			hte_push_ts_ns_atomic(gs->chip, line_id, &el,
>> +					      sizeof(el));
>> +			acv &= ~BIT(bit_index);
>> +		}
>> +		tegra_hte_writel(gs, HTE_TECMD, HTE_TECMD_CMD_POP);
>> +	}
>> +}
> What happens when the hte_push_ts_ns_atomic() fails?
> The timestamp will be quietly dropped?
> What happens when the interrupt corresponding to that dropped timestamp
> asks for it?  The irq handler thread will block until it can get a
> timestamp from the subsequent interrupt?

Two things happen, 1) at the push, HTE core increments seq counter

2) If the consumer has provided callback, it will either call that callback

with HTE_TS_DROPPED or HTE_TS_AVAIL. The seq counter gives indirect

view of dropped ts. However, I see the problem with the consumers not

providing callback, in that case, push_ts* API just wakes up process without

indicating why (assuming notify variable is true or else there is a chance for

the thread to block forever). One easy approach I can think of for now is to

make callback mandatory (which is optional right now), I will have to rethink

that scenario and will push corrected version next RFC version.

Thanks for pointing out.

>
> Which brings me back to the concern I have with the approach used in
> the hte/gpiolib integration - how do you guarantee that the timestamp
> returned by gpiod_get_hw_timestamp() corresponds to the irq interrupt
> being handled, particularly in the face of errors such as:
>  - overflows of the timestamp FIFO in the chip

I currently do not have any indication mechanism as the providers

I am dealing with right now does not have overflow hardware detection

support. If the chip supports, it should be easy to integrate that feature.

I will provide some hook function or change in push_* API to accommodate

this in next version of RFC.

>  - overflows of software FIFOs as here

HTE core records sequence counter as well it callsback the consumer with

HTE_TS_DROPPED.

>  - lost interupts (if the hw generates interrupts faster than the CPU
>    can service them)

For this, I have no idea unless hardware supports some sort of mechanism

to catch that. For the current providers, as soon as it detects changes on lines

it captures TS in its hw fifo. Its interrupt gets generated based on threshold

set in that hw fifo. This interrupt is different than the lines of actual device

that is why I said I have no idea how we can tackle that. Let me know if there

is any idea or reference of the codes which does tackle this.


Regarding HTE/GPIOLIB integration comment:

You are right, currently, I have only tsc field returned from struct hte_ts_data

to gpiolib. If I can extend that to return hte_ts_data structure which has seq

counter, which I believe can be used to track the overflow situation. The

dropped scenario can be easily tracked if gpiolib can be notified with above

mentioned DROP event through callback. If that is the case, is it ok to have

some sort of callback per gpio in gpiolib?


Any idea how I can integrate callback notification with gpiolib if you do not agree on

above callback suggestion?

> ?
>
> Cheers,
> Kent.
>
Dipen Patel July 29, 2021, 2:43 a.m. UTC | #5
Thanks for the suggestion, will change it in next RFC series.

On 7/8/21 4:33 PM, Michał Mirosław wrote:
> On Fri, Jun 25, 2021 at 04:55:24PM -0700, Dipen Patel wrote:
>> Tegra194 device has multiple HTE instances also known as GTE
>> (Generic hardware Timestamping Engine) which can timestamp subset of
>> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines
>> and exposes timestamping ability on those lines to the consumers
>> through HTE subsystem.
> [...]
>> +	ret = of_property_read_u32(dev->of_node, "slices", &slices);
>> +	if (ret != 0) {
>> +		dev_err(dev, "Could not read slices\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	hte_dev->sl = devm_kzalloc(dev, sizeof(struct hte_slices) * slices,
>> +				   GFP_KERNEL);
> Nit: There is devm_kcalloc() that will check for overflow in the
> multiply in case @slices from DT is broken.
>
> Best Regards
> Michał Mirosław
Dipen Patel July 30, 2021, 7:01 a.m. UTC | #6
On 7/28/21 4:59 PM, Dipen Patel wrote:
> Thanks Kent for the review comment. My responses inline.
>
> On 7/1/21 7:21 AM, Kent Gibson wrote:
>> On Fri, Jun 25, 2021 at 04:55:24PM -0700, Dipen Patel wrote:
>>> Tegra194 device has multiple HTE instances also known as GTE
>>> (Generic hardware Timestamping Engine) which can timestamp subset of
>>> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines
>>> and exposes timestamping ability on those lines to the consumers
>>> through HTE subsystem.
>>>
>>> Also, with this patch, added:
>>> - documentation about this provider and its capabilities at
>>> Documentation/hte.
>>> - Compilation support in Makefile and Kconfig
>>>
>>> Signed-off-by: Dipen Patel <dipenp@nvidia.com>
>>> ---
>>>  Documentation/hte/index.rst        |  21 ++
>>>  Documentation/hte/tegra194-hte.rst |  65 ++++
>>>  Documentation/index.rst            |   1 +
>>>  drivers/hte/Kconfig                |  12 +
>>>  drivers/hte/Makefile               |   1 +
>>>  drivers/hte/hte-tegra194.c         | 554 +++++++++++++++++++++++++++++
>>>  6 files changed, 654 insertions(+)
>>>  create mode 100644 Documentation/hte/index.rst
>>>  create mode 100644 Documentation/hte/tegra194-hte.rst
>>>  create mode 100644 drivers/hte/hte-tegra194.c
>>>
>>> diff --git a/Documentation/hte/index.rst b/Documentation/hte/index.rst
>>> new file mode 100644
>>> index 000000000000..f311ebec6b47
>>> --- /dev/null
>>> +++ b/Documentation/hte/index.rst
>>> @@ -0,0 +1,21 @@
>>> +.. SPDX-License-Identifier: GPL-2.0
>>> +
>>> +============================================
>>> +The Linux Hardware Timestamping Engine (HTE)
>>> +============================================
>>> +
>>> +The HTE Subsystem
>>> +=================
>>> +
>>> +.. toctree::
>>> +   :maxdepth: 1
>>> +
>>> +   hte
>>> +
>>> +HTE Tegra Provider
>>> +==================
>>> +
>>> +.. toctree::
>>> +   :maxdepth: 1
>>> +
>>> +   tegra194-hte
>>> \ No newline at end of file
>>> diff --git a/Documentation/hte/tegra194-hte.rst b/Documentation/hte/tegra194-hte.rst
>>> new file mode 100644
>>> index 000000000000..c23eaafcf080
>>> --- /dev/null
>>> +++ b/Documentation/hte/tegra194-hte.rst
>>> @@ -0,0 +1,65 @@
>>> +HTE Kernel provider driver
>>> +==========================
>>> +
>>> +Description
>>> +-----------
>>> +The Nvidia tegra194 chip has many hardware timestamping engine (HTE) instances
>>> +known as generic timestamping engine (GTE). This provider driver implements
>>> +two GTE instances 1) GPIO GTE and 2) IRQ GTE. The both GTEs instances get the
>>> +timestamp from the system counter TSC which has 31.25MHz clock rate, and the
>>> +driver converts clock tick rate to nano seconds before storing it as timestamp
>>> +value.
>>> +
>>> +GPIO GTE
>>> +--------
>>> +
>>> +This GTE instance help timestamps GPIO in real time, for that to happen GPIO
>>> +needs to be configured as input and IRQ needs to ba enabled as well. The only
>>> +always on (AON) gpio controller instance supports timestamping GPIOs in
>>> +realtime and it has 39 GPIO lines. There is also a dependency on AON GPIO
>>> +controller as it requires very specific bits to be set in GPIO config register.
>>> +It in a way creates cyclic dependency between GTE and GPIO controller. The GTE
>>> +GPIO functionality is accessed from the GPIOLIB. It can support both the in
>>> +kernel and userspace consumers. In the later case, requests go through GPIOLIB
>>> +CDEV framework. The below APIs are added in GPIOLIB framework to access HTE
>>> +subsystem and GPIO GTE for in kernel consumers.
>>> +
>>> +.. c:function:: int gpiod_hw_timestamp_control( struct gpio_desc *desc, bool enable )
>>> +
>>> +	To enable HTE on given GPIO line.
>>> +
>>> +.. c:function:: u64 gpiod_get_hw_timestamp( struct gpio_desc *desc, bool block )
>>> +
>>> +	To retrieve hardwre timestamp in nano seconds.
>>> +
>>> +.. c:function:: bool gpiod_is_hw_timestamp_enabled( const struct gpio_desc *desc )
>>> +
>>> +	To query if HTE is enabled on the given GPIO.
>>> +
>>> +There is hte-tegra194-gpio-test.c, located in ``drivers/hte/`` directory, test
>>> +driver which demonstrates above APIs for the Jetson AGX platform. For userspace
>>> +consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HARDWARE flag must be specifed during
>>> +IOCTL calls, refer ``tools/gpio/gpio-event-mon.c``, which returns the timestamp
>>> +in nano second.
>>> +
>> <snip>
>>
>>> +
>>> +static void tegra_hte_read_fifo(struct tegra_hte_soc *gs)
>>> +{
>>> +	u32 tsh, tsl, src, pv, cv, acv, slice, bit_index, line_id;
>>> +	u64 tsc;
>>> +	int dir;
>>> +	struct hte_ts_data el;
>>> +
>>> +	while ((tegra_hte_readl(gs, HTE_TESTATUS) >>
>>> +		HTE_TESTATUS_OCCUPANCY_SHIFT) &
>>> +		HTE_TESTATUS_OCCUPANCY_MASK) {
>>> +		tsh = tegra_hte_readl(gs, HTE_TETSCH);
>>> +		tsl = tegra_hte_readl(gs, HTE_TETSCL);
>>> +		tsc = (((u64)tsh << 32) | tsl);
>>> +
>>> +		src = tegra_hte_readl(gs, HTE_TESRC);
>>> +		slice = (src >> HTE_TESRC_SLICE_SHIFT) &
>>> +			    HTE_TESRC_SLICE_DEFAULT_MASK;
>>> +
>>> +		pv = tegra_hte_readl(gs, HTE_TEPCV);
>>> +		cv = tegra_hte_readl(gs, HTE_TECCV);
>>> +		acv = pv ^ cv;
>>> +		while (acv) {
>>> +			bit_index = __builtin_ctz(acv);
>>> +			if ((pv >> bit_index) & BIT(0))
>>> +				dir = HTE_EVENT_RISING_EDGE;
>>> +			else
>>> +				dir = HTE_EVENT_FALLING_EDGE;
>>> +
>>> +			line_id = bit_index + (slice << 5);
>>> +			el.dir = dir;
>>> +			el.tsc = tsc << HTE_TS_NS_SHIFT;
>>> +			hte_push_ts_ns_atomic(gs->chip, line_id, &el,
>>> +					      sizeof(el));
>>> +			acv &= ~BIT(bit_index);
>>> +		}
>>> +		tegra_hte_writel(gs, HTE_TECMD, HTE_TECMD_CMD_POP);
>>> +	}
>>> +}
>> What happens when the hte_push_ts_ns_atomic() fails?
>> The timestamp will be quietly dropped?
>> What happens when the interrupt corresponding to that dropped timestamp
>> asks for it?  The irq handler thread will block until it can get a
>> timestamp from the subsequent interrupt?
> Two things happen, 1) at the push, HTE core increments seq counter
>
> 2) If the consumer has provided callback, it will either call that callback
>
> with HTE_TS_DROPPED or HTE_TS_AVAIL. The seq counter gives indirect
>
> view of dropped ts. However, I see the problem with the consumers not
>
> providing callback, in that case, push_ts* API just wakes up process without
>
> indicating why (assuming notify variable is true or else there is a chance for
>
> the thread to block forever). One easy approach I can think of for now is to
>
> make callback mandatory (which is optional right now), I will have to rethink
>
> that scenario and will push corrected version next RFC version.
>
> Thanks for pointing out.
>
>> Which brings me back to the concern I have with the approach used in
>> the hte/gpiolib integration - how do you guarantee that the timestamp
>> returned by gpiod_get_hw_timestamp() corresponds to the irq interrupt
>> being handled, particularly in the face of errors such as:
>>  - overflows of the timestamp FIFO in the chip
> I currently do not have any indication mechanism as the providers
>
> I am dealing with right now does not have overflow hardware detection
>
> support. If the chip supports, it should be easy to integrate that feature.
>
> I will provide some hook function or change in push_* API to accommodate
>
> this in next version of RFC.
>
>>  - overflows of software FIFOs as here
> HTE core records sequence counter as well it callsback the consumer with
>
> HTE_TS_DROPPED.
>
>>  - lost interupts (if the hw generates interrupts faster than the CPU
>>    can service them)
> For this, I have no idea unless hardware supports some sort of mechanism
>
> to catch that. For the current providers, as soon as it detects changes on lines
>
> it captures TS in its hw fifo. Its interrupt gets generated based on threshold
>
> set in that hw fifo. This interrupt is different than the lines of actual device
>
> that is why I said I have no idea how we can tackle that. Let me know if there
>
> is any idea or reference of the codes which does tackle this.
>
>
> Regarding HTE/GPIOLIB integration comment:
>
> You are right, currently, I have only tsc field returned from struct hte_ts_data
>
> to gpiolib. If I can extend that to return hte_ts_data structure which has seq
>
> counter, which I believe can be used to track the overflow situation. The

The reason I only return timestamp and not other details like its seq

counter, is because to comply with line_event_timestamp since it returns

only u64. Not sure which is the best way to extend and bring out its seq.

>
> dropped scenario can be easily tracked if gpiolib can be notified with above
>
> mentioned DROP event through callback. If that is the case, is it ok to have
>
> some sort of callback per gpio in gpiolib?
>
>
> Any idea how I can integrate callback notification with gpiolib if you do not agree on
>
> above callback suggestion?
>
>> ?
>>
>> Cheers,
>> Kent.
>>
Kent Gibson July 31, 2021, 3:43 p.m. UTC | #7
On Wed, Jul 28, 2021 at 04:59:08PM -0700, Dipen Patel wrote:
> Thanks Kent for the review comment. My responses inline.
> 
> On 7/1/21 7:21 AM, Kent Gibson wrote:
> > On Fri, Jun 25, 2021 at 04:55:24PM -0700, Dipen Patel wrote:
> >> Tegra194 device has multiple HTE instances also known as GTE
> >> (Generic hardware Timestamping Engine) which can timestamp subset of
> >> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines
> >> and exposes timestamping ability on those lines to the consumers
> >> through HTE subsystem.
> >>
> >> Also, with this patch, added:
> >> - documentation about this provider and its capabilities at
> >> Documentation/hte.
> >> - Compilation support in Makefile and Kconfig
> >>
> >> Signed-off-by: Dipen Patel <dipenp@nvidia.com>
> >> ---
> >>  Documentation/hte/index.rst        |  21 ++
> >>  Documentation/hte/tegra194-hte.rst |  65 ++++
> >>  Documentation/index.rst            |   1 +
> >>  drivers/hte/Kconfig                |  12 +
> >>  drivers/hte/Makefile               |   1 +
> >>  drivers/hte/hte-tegra194.c         | 554 +++++++++++++++++++++++++++++
> >>  6 files changed, 654 insertions(+)
> >>  create mode 100644 Documentation/hte/index.rst
> >>  create mode 100644 Documentation/hte/tegra194-hte.rst
> >>  create mode 100644 drivers/hte/hte-tegra194.c
> >>
> >> diff --git a/Documentation/hte/index.rst b/Documentation/hte/index.rst
> >> new file mode 100644
> >> index 000000000000..f311ebec6b47
> >> --- /dev/null
> >> +++ b/Documentation/hte/index.rst
> >> @@ -0,0 +1,21 @@
> >> +.. SPDX-License-Identifier: GPL-2.0
> >> +
> >> +============================================
> >> +The Linux Hardware Timestamping Engine (HTE)
> >> +============================================
> >> +
> >> +The HTE Subsystem
> >> +=================
> >> +
> >> +.. toctree::
> >> +   :maxdepth: 1
> >> +
> >> +   hte
> >> +
> >> +HTE Tegra Provider
> >> +==================
> >> +
> >> +.. toctree::
> >> +   :maxdepth: 1
> >> +
> >> +   tegra194-hte
> >> \ No newline at end of file
> >> diff --git a/Documentation/hte/tegra194-hte.rst b/Documentation/hte/tegra194-hte.rst
> >> new file mode 100644
> >> index 000000000000..c23eaafcf080
> >> --- /dev/null
> >> +++ b/Documentation/hte/tegra194-hte.rst
> >> @@ -0,0 +1,65 @@
> >> +HTE Kernel provider driver
> >> +==========================
> >> +
> >> +Description
> >> +-----------
> >> +The Nvidia tegra194 chip has many hardware timestamping engine (HTE) instances
> >> +known as generic timestamping engine (GTE). This provider driver implements
> >> +two GTE instances 1) GPIO GTE and 2) IRQ GTE. The both GTEs instances get the
> >> +timestamp from the system counter TSC which has 31.25MHz clock rate, and the
> >> +driver converts clock tick rate to nano seconds before storing it as timestamp
> >> +value.
> >> +
> >> +GPIO GTE
> >> +--------
> >> +
> >> +This GTE instance help timestamps GPIO in real time, for that to happen GPIO
> >> +needs to be configured as input and IRQ needs to ba enabled as well. The only
> >> +always on (AON) gpio controller instance supports timestamping GPIOs in
> >> +realtime and it has 39 GPIO lines. There is also a dependency on AON GPIO
> >> +controller as it requires very specific bits to be set in GPIO config register.
> >> +It in a way creates cyclic dependency between GTE and GPIO controller. The GTE
> >> +GPIO functionality is accessed from the GPIOLIB. It can support both the in
> >> +kernel and userspace consumers. In the later case, requests go through GPIOLIB
> >> +CDEV framework. The below APIs are added in GPIOLIB framework to access HTE
> >> +subsystem and GPIO GTE for in kernel consumers.
> >> +
> >> +.. c:function:: int gpiod_hw_timestamp_control( struct gpio_desc *desc, bool enable )
> >> +
> >> +	To enable HTE on given GPIO line.
> >> +
> >> +.. c:function:: u64 gpiod_get_hw_timestamp( struct gpio_desc *desc, bool block )
> >> +
> >> +	To retrieve hardwre timestamp in nano seconds.
> >> +
> >> +.. c:function:: bool gpiod_is_hw_timestamp_enabled( const struct gpio_desc *desc )
> >> +
> >> +	To query if HTE is enabled on the given GPIO.
> >> +
> >> +There is hte-tegra194-gpio-test.c, located in ``drivers/hte/`` directory, test
> >> +driver which demonstrates above APIs for the Jetson AGX platform. For userspace
> >> +consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HARDWARE flag must be specifed during
> >> +IOCTL calls, refer ``tools/gpio/gpio-event-mon.c``, which returns the timestamp
> >> +in nano second.
> >> +
> > <snip>
> >
> >> +
> >> +static void tegra_hte_read_fifo(struct tegra_hte_soc *gs)
> >> +{
> >> +	u32 tsh, tsl, src, pv, cv, acv, slice, bit_index, line_id;
> >> +	u64 tsc;
> >> +	int dir;
> >> +	struct hte_ts_data el;
> >> +
> >> +	while ((tegra_hte_readl(gs, HTE_TESTATUS) >>
> >> +		HTE_TESTATUS_OCCUPANCY_SHIFT) &
> >> +		HTE_TESTATUS_OCCUPANCY_MASK) {
> >> +		tsh = tegra_hte_readl(gs, HTE_TETSCH);
> >> +		tsl = tegra_hte_readl(gs, HTE_TETSCL);
> >> +		tsc = (((u64)tsh << 32) | tsl);
> >> +
> >> +		src = tegra_hte_readl(gs, HTE_TESRC);
> >> +		slice = (src >> HTE_TESRC_SLICE_SHIFT) &
> >> +			    HTE_TESRC_SLICE_DEFAULT_MASK;
> >> +
> >> +		pv = tegra_hte_readl(gs, HTE_TEPCV);
> >> +		cv = tegra_hte_readl(gs, HTE_TECCV);
> >> +		acv = pv ^ cv;
> >> +		while (acv) {
> >> +			bit_index = __builtin_ctz(acv);
> >> +			if ((pv >> bit_index) & BIT(0))
> >> +				dir = HTE_EVENT_RISING_EDGE;
> >> +			else
> >> +				dir = HTE_EVENT_FALLING_EDGE;
> >> +
> >> +			line_id = bit_index + (slice << 5);
> >> +			el.dir = dir;
> >> +			el.tsc = tsc << HTE_TS_NS_SHIFT;
> >> +			hte_push_ts_ns_atomic(gs->chip, line_id, &el,
> >> +					      sizeof(el));
> >> +			acv &= ~BIT(bit_index);
> >> +		}
> >> +		tegra_hte_writel(gs, HTE_TECMD, HTE_TECMD_CMD_POP);
> >> +	}
> >> +}
> > What happens when the hte_push_ts_ns_atomic() fails?
> > The timestamp will be quietly dropped?
> > What happens when the interrupt corresponding to that dropped timestamp
> > asks for it?  The irq handler thread will block until it can get a
> > timestamp from the subsequent interrupt?
> 
> Two things happen, 1) at the push, HTE core increments seq counter
> 
> 2) If the consumer has provided callback, it will either call that callback
> 
> with HTE_TS_DROPPED or HTE_TS_AVAIL. The seq counter gives indirect
> 
> view of dropped ts. However, I see the problem with the consumers not
> 
> providing callback, in that case, push_ts* API just wakes up process without
> 
> indicating why (assuming notify variable is true or else there is a chance for
> 
> the thread to block forever). One easy approach I can think of for now is to
> 
> make callback mandatory (which is optional right now), I will have to rethink
> 
> that scenario and will push corrected version next RFC version.
> 
> Thanks for pointing out.
> 

I'm not sure you understood my question, which was intended to
demonstrate how an overflow here would break your gpio integration, but I
am certain that I don't understand your answer.

Using the callback to signal fifo overflow to the consumer is crazy.
If the consumer is too busy to service the fifo then they probably wont
be prepared to deal with the callback either. And the primary purpose of
the fifo is to decouple the producer and consumer, so requiring a callback
defeats the whole purpose of having the fifo there in the first place.

> >
> > Which brings me back to the concern I have with the approach used in
> > the hte/gpiolib integration - how do you guarantee that the timestamp
> > returned by gpiod_get_hw_timestamp() corresponds to the irq interrupt
> > being handled, particularly in the face of errors such as:
> >  - overflows of the timestamp FIFO in the chip
> 
> I currently do not have any indication mechanism as the providers
> 
> I am dealing with right now does not have overflow hardware detection
> 
> support. If the chip supports, it should be easy to integrate that feature.
> 
> I will provide some hook function or change in push_* API to accommodate
> 
> this in next version of RFC.
> 
> >  - overflows of software FIFOs as here
> 
> HTE core records sequence counter as well it callsback the consumer with
> 
> HTE_TS_DROPPED.
> 
> >  - lost interupts (if the hw generates interrupts faster than the CPU
> >    can service them)
> 
> For this, I have no idea unless hardware supports some sort of mechanism
> 
> to catch that. For the current providers, as soon as it detects changes on lines
> 
> it captures TS in its hw fifo. Its interrupt gets generated based on threshold
> 
> set in that hw fifo. This interrupt is different than the lines of actual device
> 
> that is why I said I have no idea how we can tackle that. Let me know if there
> 
> is any idea or reference of the codes which does tackle this.
> 

As far as I am aware there is no solution, given your suggested
architecture.

Your architecture is inherently fragile, as you try to use one stream
of data (the timestamp fifo) to provide supplementary info for another
(the physical irq).  Guaranteeing that the two are synchronised is
impossible - even if you can get them synced at some point, they can
fall out of sync without any indication.
That is a recipe for Ingenuity flight 6.

My solution would be to use the hte timestamp fifo as the event source,
rather than the physical irq.  With only one event source the 
synchronisation problem disappears.  As to how to implement that,
gpiolib-cdev would request a line from the hte subsystem and register
and event handler for it, much as it does currently with the irq
subsystem. That event handler would translate the hte events into gpio
events.

You still have to deal with possible fifo overflows, but if the fifo
overflows result in discarding the oldest event, rather than the most
recent, then everything comes out in the wash.  If not then the final
event in a burst may not correspond to the actual state so you need
some additional mechanism to address that.
Either way the consumer needs to be aware that events may be lost - but
with the event seqno for consumers to detect those lost events we
already have that covered.

> 
> Regarding HTE/GPIOLIB integration comment:
> 
> You are right, currently, I have only tsc field returned from struct hte_ts_data
> 
> to gpiolib. If I can extend that to return hte_ts_data structure which has seq
> 
> counter, which I believe can be used to track the overflow situation. The
> 
> dropped scenario can be easily tracked if gpiolib can be notified with above
> 
> mentioned DROP event through callback. If that is the case, is it ok to have
> 
> some sort of callback per gpio in gpiolib?
> 

Even better if you can provide the whole struct hte_ts_data so we have
the direction as well (assuming all hte providers provide direction?).
Otherwise gpiolib-cdev may need to read the physical line state and that
may have changed since the hardware captured the event.
In the solution I outlined above, the hte_ts_data would be provided to
the event handler registered by gpiolib-cdev.
And in this case you could skip buffering the event in hte - it could be
passed to the event handler as soon as it is read from the hardware - 
gpiolib-cdev does its own buffering of gpio events.

> 
> Any idea how I can integrate callback notification with gpiolib if you do not agree on
> 
> above callback suggestion?
> 

See above.  But this is just my take, so I would get feedback from the
relevant maintainers or SMEs before you act on anything suggested above.

Cheers,
Kent.
Kent Gibson Aug. 3, 2021, 11:02 p.m. UTC | #8
On Tue, Aug 03, 2021 at 03:40:50PM -0700, Dipen Patel wrote:
> 
> On 7/31/21 8:43 AM, Kent Gibson wrote:
> > On Wed, Jul 28, 2021 at 04:59:08PM -0700, Dipen Patel wrote:
> >> Thanks Kent for the review comment. My responses inline.
> >>
> >> On 7/1/21 7:21 AM, Kent Gibson wrote:
> >>> On Fri, Jun 25, 2021 at 04:55:24PM -0700, Dipen Patel wrote:
> >>>> Tegra194 device has multiple HTE instances also known as GTE
> >>>> (Generic hardware Timestamping Engine) which can timestamp subset of
> >>>> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines
> >>>> and exposes timestamping ability on those lines to the consumers
> >>>> through HTE subsystem.
> >>>>
> >>>> Also, with this patch, added:
> >>>> - documentation about this provider and its capabilities at
> >>>> Documentation/hte.
> >>>> - Compilation support in Makefile and Kconfig
> >>>>
> >>>> Signed-off-by: Dipen Patel <dipenp@nvidia.com>
> >>>> ---
> >>>>  Documentation/hte/index.rst        |  21 ++
> >>>>  Documentation/hte/tegra194-hte.rst |  65 ++++
> >>>>  Documentation/index.rst            |   1 +
> >>>>  drivers/hte/Kconfig                |  12 +
> >>>>  drivers/hte/Makefile               |   1 +
> >>>>  drivers/hte/hte-tegra194.c         | 554 +++++++++++++++++++++++++++++
> >>>>  6 files changed, 654 insertions(+)
> >>>>  create mode 100644 Documentation/hte/index.rst
> >>>>  create mode 100644 Documentation/hte/tegra194-hte.rst
> >>>>  create mode 100644 drivers/hte/hte-tegra194.c
> >>>>
> >>>> diff --git a/Documentation/hte/index.rst b/Documentation/hte/index.rst
> >>>> new file mode 100644
> >>>> index 000000000000..f311ebec6b47
> >>>> --- /dev/null
> >>>> +++ b/Documentation/hte/index.rst
> >>>> @@ -0,0 +1,21 @@
> >>>> +.. SPDX-License-Identifier: GPL-2.0
> >>>> +
> >>>> +============================================
> >>>> +The Linux Hardware Timestamping Engine (HTE)
> >>>> +============================================
> >>>> +
> >>>> +The HTE Subsystem
> >>>> +=================
> >>>> +
> >>>> +.. toctree::
> >>>> +   :maxdepth: 1
> >>>> +
> >>>> +   hte
> >>>> +
> >>>> +HTE Tegra Provider
> >>>> +==================
> >>>> +
> >>>> +.. toctree::
> >>>> +   :maxdepth: 1
> >>>> +
> >>>> +   tegra194-hte
> >>>> \ No newline at end of file
> >>>> diff --git a/Documentation/hte/tegra194-hte.rst b/Documentation/hte/tegra194-hte.rst
> >>>> new file mode 100644
> >>>> index 000000000000..c23eaafcf080
> >>>> --- /dev/null
> >>>> +++ b/Documentation/hte/tegra194-hte.rst
> >>>> @@ -0,0 +1,65 @@
> >>>> +HTE Kernel provider driver
> >>>> +==========================
> >>>> +
> >>>> +Description
> >>>> +-----------
> >>>> +The Nvidia tegra194 chip has many hardware timestamping engine (HTE) instances
> >>>> +known as generic timestamping engine (GTE). This provider driver implements
> >>>> +two GTE instances 1) GPIO GTE and 2) IRQ GTE. The both GTEs instances get the
> >>>> +timestamp from the system counter TSC which has 31.25MHz clock rate, and the
> >>>> +driver converts clock tick rate to nano seconds before storing it as timestamp
> >>>> +value.
> >>>> +
> >>>> +GPIO GTE
> >>>> +--------
> >>>> +
> >>>> +This GTE instance help timestamps GPIO in real time, for that to happen GPIO
> >>>> +needs to be configured as input and IRQ needs to ba enabled as well. The only
> >>>> +always on (AON) gpio controller instance supports timestamping GPIOs in
> >>>> +realtime and it has 39 GPIO lines. There is also a dependency on AON GPIO
> >>>> +controller as it requires very specific bits to be set in GPIO config register.
> >>>> +It in a way creates cyclic dependency between GTE and GPIO controller. The GTE
> >>>> +GPIO functionality is accessed from the GPIOLIB. It can support both the in
> >>>> +kernel and userspace consumers. In the later case, requests go through GPIOLIB
> >>>> +CDEV framework. The below APIs are added in GPIOLIB framework to access HTE
> >>>> +subsystem and GPIO GTE for in kernel consumers.
> >>>> +
> >>>> +.. c:function:: int gpiod_hw_timestamp_control( struct gpio_desc *desc, bool enable )
> >>>> +
> >>>> +	To enable HTE on given GPIO line.
> >>>> +
> >>>> +.. c:function:: u64 gpiod_get_hw_timestamp( struct gpio_desc *desc, bool block )
> >>>> +
> >>>> +	To retrieve hardwre timestamp in nano seconds.
> >>>> +
> >>>> +.. c:function:: bool gpiod_is_hw_timestamp_enabled( const struct gpio_desc *desc )
> >>>> +
> >>>> +	To query if HTE is enabled on the given GPIO.
> >>>> +
> >>>> +There is hte-tegra194-gpio-test.c, located in ``drivers/hte/`` directory, test
> >>>> +driver which demonstrates above APIs for the Jetson AGX platform. For userspace
> >>>> +consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HARDWARE flag must be specifed during
> >>>> +IOCTL calls, refer ``tools/gpio/gpio-event-mon.c``, which returns the timestamp
> >>>> +in nano second.
> >>>> +
> >>> <snip>
> >>>
> >>>> +
> >>>> +static void tegra_hte_read_fifo(struct tegra_hte_soc *gs)
> >>>> +{
> >>>> +	u32 tsh, tsl, src, pv, cv, acv, slice, bit_index, line_id;
> >>>> +	u64 tsc;
> >>>> +	int dir;
> >>>> +	struct hte_ts_data el;
> >>>> +
> >>>> +	while ((tegra_hte_readl(gs, HTE_TESTATUS) >>
> >>>> +		HTE_TESTATUS_OCCUPANCY_SHIFT) &
> >>>> +		HTE_TESTATUS_OCCUPANCY_MASK) {
> >>>> +		tsh = tegra_hte_readl(gs, HTE_TETSCH);
> >>>> +		tsl = tegra_hte_readl(gs, HTE_TETSCL);
> >>>> +		tsc = (((u64)tsh << 32) | tsl);
> >>>> +
> >>>> +		src = tegra_hte_readl(gs, HTE_TESRC);
> >>>> +		slice = (src >> HTE_TESRC_SLICE_SHIFT) &
> >>>> +			    HTE_TESRC_SLICE_DEFAULT_MASK;
> >>>> +
> >>>> +		pv = tegra_hte_readl(gs, HTE_TEPCV);
> >>>> +		cv = tegra_hte_readl(gs, HTE_TECCV);
> >>>> +		acv = pv ^ cv;
> >>>> +		while (acv) {
> >>>> +			bit_index = __builtin_ctz(acv);
> >>>> +			if ((pv >> bit_index) & BIT(0))
> >>>> +				dir = HTE_EVENT_RISING_EDGE;
> >>>> +			else
> >>>> +				dir = HTE_EVENT_FALLING_EDGE;
> >>>> +
> >>>> +			line_id = bit_index + (slice << 5);
> >>>> +			el.dir = dir;
> >>>> +			el.tsc = tsc << HTE_TS_NS_SHIFT;
> >>>> +			hte_push_ts_ns_atomic(gs->chip, line_id, &el,
> >>>> +					      sizeof(el));
> >>>> +			acv &= ~BIT(bit_index);
> >>>> +		}
> >>>> +		tegra_hte_writel(gs, HTE_TECMD, HTE_TECMD_CMD_POP);
> >>>> +	}
> >>>> +}
> >>> What happens when the hte_push_ts_ns_atomic() fails?
> >>> The timestamp will be quietly dropped?
> >>> What happens when the interrupt corresponding to that dropped timestamp
> >>> asks for it?  The irq handler thread will block until it can get a
> >>> timestamp from the subsequent interrupt?
> >> Two things happen, 1) at the push, HTE core increments seq counter
> >>
> >> 2) If the consumer has provided callback, it will either call that callback
> >>
> >> with HTE_TS_DROPPED or HTE_TS_AVAIL. The seq counter gives indirect
> >>
> >> view of dropped ts. However, I see the problem with the consumers not
> >>
> >> providing callback, in that case, push_ts* API just wakes up process without
> >>
> >> indicating why (assuming notify variable is true or else there is a chance for
> >>
> >> the thread to block forever). One easy approach I can think of for now is to
> >>
> >> make callback mandatory (which is optional right now), I will have to rethink
> >>
> >> that scenario and will push corrected version next RFC version.
> >>
> >> Thanks for pointing out.
> >>
> > I'm not sure you understood my question, which was intended to
> > demonstrate how an overflow here would break your gpio integration, but I
> > am certain that I don't understand your answer.
> >
> > Using the callback to signal fifo overflow to the consumer is crazy.
> > If the consumer is too busy to service the fifo then they probably wont
> > be prepared to deal with the callback either. And the primary purpose of
> > the fifo is to decouple the producer and consumer, so requiring a callback
> > defeats the whole purpose of having the fifo there in the first place.
> >
> >>> Which brings me back to the concern I have with the approach used in
> >>> the hte/gpiolib integration - how do you guarantee that the timestamp
> >>> returned by gpiod_get_hw_timestamp() corresponds to the irq interrupt
> >>> being handled, particularly in the face of errors such as:
> >>>  - overflows of the timestamp FIFO in the chip
> >> I currently do not have any indication mechanism as the providers
> >>
> >> I am dealing with right now does not have overflow hardware detection
> >>
> >> support. If the chip supports, it should be easy to integrate that feature.
> >>
> >> I will provide some hook function or change in push_* API to accommodate
> >>
> >> this in next version of RFC.
> >>
> >>>  - overflows of software FIFOs as here
> >> HTE core records sequence counter as well it callsback the consumer with
> >>
> >> HTE_TS_DROPPED.
> >>
> >>>  - lost interupts (if the hw generates interrupts faster than the CPU
> >>>    can service them)
> >> For this, I have no idea unless hardware supports some sort of mechanism
> >>
> >> to catch that. For the current providers, as soon as it detects changes on lines
> >>
> >> it captures TS in its hw fifo. Its interrupt gets generated based on threshold
> >>
> >> set in that hw fifo. This interrupt is different than the lines of actual device
> >>
> >> that is why I said I have no idea how we can tackle that. Let me know if there
> >>
> >> is any idea or reference of the codes which does tackle this.
> >>
> > As far as I am aware there is no solution, given your suggested
> > architecture.
> >
> > Your architecture is inherently fragile, as you try to use one stream
> > of data (the timestamp fifo) to provide supplementary info for another
> > (the physical irq).  Guaranteeing that the two are synchronised is
> > impossible - even if you can get them synced at some point, they can
> > fall out of sync without any indication.
> > That is a recipe for Ingenuity flight 6.
> >
> > My solution would be to use the hte timestamp fifo as the event source,
> > rather than the physical irq.  With only one event source the 
> > synchronisation problem disappears.  As to how to implement that,
> > gpiolib-cdev would request a line from the hte subsystem and register
> > and event handler for it, much as it does currently with the irq
> > subsystem. 
> Regarding "
> 
> much as it does currently with the irq
> subsystem
> 
> " Statment, do you mean edge_irq_handler?

I mean that style of API.  Obviously it would be a new handler function.
But it would perform the same as edge_irq_handler and edge_irq_thread,
just with a different event source.

> > That event handler would translate the hte events into gpio
> > events.
> >
> > You still have to deal with possible fifo overflows, but if the fifo
> > overflows result in discarding the oldest event, rather than the most
> > recent, then everything comes out in the wash.  If not then the final
> > event in a burst may not correspond to the actual state so you need
> > some additional mechanism to address that.
> > Either way the consumer needs to be aware that events may be lost - but
> > with the event seqno for consumers to detect those lost events we
> > already have that covered.
> >
> >> Regarding HTE/GPIOLIB integration comment:
> >>
> >> You are right, currently, I have only tsc field returned from struct hte_ts_data
> >>
> >> to gpiolib. If I can extend that to return hte_ts_data structure which has seq
> >>
> >> counter, which I believe can be used to track the overflow situation. The
> >>
> >> dropped scenario can be easily tracked if gpiolib can be notified with above
> >>
> >> mentioned DROP event through callback. If that is the case, is it ok to have
> >>
> >> some sort of callback per gpio in gpiolib?
> >>
> > Even better if you can provide the whole struct hte_ts_data so we have
> > the direction as well (assuming all hte providers provide direction?).
> > Otherwise gpiolib-cdev may need to read the physical line state and that
> > may have changed since the hardware captured the event.
> > In the solution I outlined above, the hte_ts_data would be provided to
> > the event handler registered by gpiolib-cdev.
> 
> How is this event handler different then cdev providing callback to
> 
> hte core? I am guessing even cdev registers event handler with HTE
> 
> it is some sort of function  pointer so does callbacks.
> 

If you mean your proposed callbacks, well for starters it wouldn't pass
it a DROPPED event.

But other than that registering a handler it essentially a callback.
Your existing callback is at interrupt context, right?
The irq subsystem also has provision for handling the event at
interrupt context or thread context - gpiolib-cdev uses both.
You might want to do the same here - depends on what your expected
consumers would prefer.

Way back when you initially proposed this I said "this is an irq problem",
meaning that it makes sense to me that this should be integrated with irq,
and provide functions to return additional detail to the irq handlers,
such as the event timestamp.
Not sure what the irq guys think of that - it may be simpler and
clearer to provide a separate subsystem.
Either way, a hte subsystem that provides an irq-like API might be a
good way to start.

Cheers,
Kent.
Dipen Patel Aug. 7, 2021, 2:41 a.m. UTC | #9
On 7/31/21 8:43 AM, Kent Gibson wrote:
> On Wed, Jul 28, 2021 at 04:59:08PM -0700, Dipen Patel wrote:

>> Thanks Kent for the review comment. My responses inline.

>>

>> On 7/1/21 7:21 AM, Kent Gibson wrote:

>>> On Fri, Jun 25, 2021 at 04:55:24PM -0700, Dipen Patel wrote:

>>>> Tegra194 device has multiple HTE instances also known as GTE

>>>> (Generic hardware Timestamping Engine) which can timestamp subset of

>>>> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines

>>>> and exposes timestamping ability on those lines to the consumers

>>>> through HTE subsystem.

>>>>

>>>> Also, with this patch, added:

>>>> - documentation about this provider and its capabilities at

>>>> Documentation/hte.

>>>> - Compilation support in Makefile and Kconfig

>>>>

>>>> Signed-off-by: Dipen Patel <dipenp@nvidia.com>

>>>> ---

>>>>  Documentation/hte/index.rst        |  21 ++

>>>>  Documentation/hte/tegra194-hte.rst |  65 ++++

>>>>  Documentation/index.rst            |   1 +

>>>>  drivers/hte/Kconfig                |  12 +

>>>>  drivers/hte/Makefile               |   1 +

>>>>  drivers/hte/hte-tegra194.c         | 554 +++++++++++++++++++++++++++++

>>>>  6 files changed, 654 insertions(+)

>>>>  create mode 100644 Documentation/hte/index.rst

>>>>  create mode 100644 Documentation/hte/tegra194-hte.rst

>>>>  create mode 100644 drivers/hte/hte-tegra194.c

>>>>

>>>> diff --git a/Documentation/hte/index.rst b/Documentation/hte/index.rst

>>>> new file mode 100644

>>>> index 000000000000..f311ebec6b47

>>>> --- /dev/null

>>>> +++ b/Documentation/hte/index.rst

>>>> @@ -0,0 +1,21 @@

>>>> +.. SPDX-License-Identifier: GPL-2.0

>>>> +

>>>> +============================================

>>>> +The Linux Hardware Timestamping Engine (HTE)

>>>> +============================================

>>>> +

>>>> +The HTE Subsystem

>>>> +=================

>>>> +

>>>> +.. toctree::

>>>> +   :maxdepth: 1

>>>> +

>>>> +   hte

>>>> +

>>>> +HTE Tegra Provider

>>>> +==================

>>>> +

>>>> +.. toctree::

>>>> +   :maxdepth: 1

>>>> +

>>>> +   tegra194-hte

>>>> \ No newline at end of file

>>>> diff --git a/Documentation/hte/tegra194-hte.rst b/Documentation/hte/tegra194-hte.rst

>>>> new file mode 100644

>>>> index 000000000000..c23eaafcf080

>>>> --- /dev/null

>>>> +++ b/Documentation/hte/tegra194-hte.rst

>>>> @@ -0,0 +1,65 @@

>>>> +HTE Kernel provider driver

>>>> +==========================

>>>> +

>>>> +Description

>>>> +-----------

>>>> +The Nvidia tegra194 chip has many hardware timestamping engine (HTE) instances

>>>> +known as generic timestamping engine (GTE). This provider driver implements

>>>> +two GTE instances 1) GPIO GTE and 2) IRQ GTE. The both GTEs instances get the

>>>> +timestamp from the system counter TSC which has 31.25MHz clock rate, and the

>>>> +driver converts clock tick rate to nano seconds before storing it as timestamp

>>>> +value.

>>>> +

>>>> +GPIO GTE

>>>> +--------

>>>> +

>>>> +This GTE instance help timestamps GPIO in real time, for that to happen GPIO

>>>> +needs to be configured as input and IRQ needs to ba enabled as well. The only

>>>> +always on (AON) gpio controller instance supports timestamping GPIOs in

>>>> +realtime and it has 39 GPIO lines. There is also a dependency on AON GPIO

>>>> +controller as it requires very specific bits to be set in GPIO config register.

>>>> +It in a way creates cyclic dependency between GTE and GPIO controller. The GTE

>>>> +GPIO functionality is accessed from the GPIOLIB. It can support both the in

>>>> +kernel and userspace consumers. In the later case, requests go through GPIOLIB

>>>> +CDEV framework. The below APIs are added in GPIOLIB framework to access HTE

>>>> +subsystem and GPIO GTE for in kernel consumers.

>>>> +

>>>> +.. c:function:: int gpiod_hw_timestamp_control( struct gpio_desc *desc, bool enable )

>>>> +

>>>> +	To enable HTE on given GPIO line.

>>>> +

>>>> +.. c:function:: u64 gpiod_get_hw_timestamp( struct gpio_desc *desc, bool block )

>>>> +

>>>> +	To retrieve hardwre timestamp in nano seconds.

>>>> +

>>>> +.. c:function:: bool gpiod_is_hw_timestamp_enabled( const struct gpio_desc *desc )

>>>> +

>>>> +	To query if HTE is enabled on the given GPIO.

>>>> +

>>>> +There is hte-tegra194-gpio-test.c, located in ``drivers/hte/`` directory, test

>>>> +driver which demonstrates above APIs for the Jetson AGX platform. For userspace

>>>> +consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HARDWARE flag must be specifed during

>>>> +IOCTL calls, refer ``tools/gpio/gpio-event-mon.c``, which returns the timestamp

>>>> +in nano second.

>>>> +

>>> <snip>

>>>

>>>> +

>>>> +static void tegra_hte_read_fifo(struct tegra_hte_soc *gs)

>>>> +{

>>>> +	u32 tsh, tsl, src, pv, cv, acv, slice, bit_index, line_id;

>>>> +	u64 tsc;

>>>> +	int dir;

>>>> +	struct hte_ts_data el;

>>>> +

>>>> +	while ((tegra_hte_readl(gs, HTE_TESTATUS) >>

>>>> +		HTE_TESTATUS_OCCUPANCY_SHIFT) &

>>>> +		HTE_TESTATUS_OCCUPANCY_MASK) {

>>>> +		tsh = tegra_hte_readl(gs, HTE_TETSCH);

>>>> +		tsl = tegra_hte_readl(gs, HTE_TETSCL);

>>>> +		tsc = (((u64)tsh << 32) | tsl);

>>>> +

>>>> +		src = tegra_hte_readl(gs, HTE_TESRC);

>>>> +		slice = (src >> HTE_TESRC_SLICE_SHIFT) &

>>>> +			    HTE_TESRC_SLICE_DEFAULT_MASK;

>>>> +

>>>> +		pv = tegra_hte_readl(gs, HTE_TEPCV);

>>>> +		cv = tegra_hte_readl(gs, HTE_TECCV);

>>>> +		acv = pv ^ cv;

>>>> +		while (acv) {

>>>> +			bit_index = __builtin_ctz(acv);

>>>> +			if ((pv >> bit_index) & BIT(0))

>>>> +				dir = HTE_EVENT_RISING_EDGE;

>>>> +			else

>>>> +				dir = HTE_EVENT_FALLING_EDGE;

>>>> +

>>>> +			line_id = bit_index + (slice << 5);

>>>> +			el.dir = dir;

>>>> +			el.tsc = tsc << HTE_TS_NS_SHIFT;

>>>> +			hte_push_ts_ns_atomic(gs->chip, line_id, &el,

>>>> +					      sizeof(el));

>>>> +			acv &= ~BIT(bit_index);

>>>> +		}

>>>> +		tegra_hte_writel(gs, HTE_TECMD, HTE_TECMD_CMD_POP);

>>>> +	}

>>>> +}

>>> What happens when the hte_push_ts_ns_atomic() fails?

>>> The timestamp will be quietly dropped?

>>> What happens when the interrupt corresponding to that dropped timestamp

>>> asks for it?  The irq handler thread will block until it can get a

>>> timestamp from the subsequent interrupt?

>> Two things happen, 1) at the push, HTE core increments seq counter

>>

>> 2) If the consumer has provided callback, it will either call that callback

>>

>> with HTE_TS_DROPPED or HTE_TS_AVAIL. The seq counter gives indirect

>>

>> view of dropped ts. However, I see the problem with the consumers not

>>

>> providing callback, in that case, push_ts* API just wakes up process without

>>

>> indicating why (assuming notify variable is true or else there is a chance for

>>

>> the thread to block forever). One easy approach I can think of for now is to

>>

>> make callback mandatory (which is optional right now), I will have to rethink

>>

>> that scenario and will push corrected version next RFC version.

>>

>> Thanks for pointing out.

>>

> I'm not sure you understood my question, which was intended to

> demonstrate how an overflow here would break your gpio integration, but I

> am certain that I don't understand your answer.

>

> Using the callback to signal fifo overflow to the consumer is crazy.

> If the consumer is too busy to service the fifo then they probably wont

> be prepared to deal with the callback either. And the primary purpose of

> the fifo is to decouple the producer and consumer, so requiring a callback

> defeats the whole purpose of having the fifo there in the first place.

>

>>> Which brings me back to the concern I have with the approach used in

>>> the hte/gpiolib integration - how do you guarantee that the timestamp

>>> returned by gpiod_get_hw_timestamp() corresponds to the irq interrupt

>>> being handled, particularly in the face of errors such as:

>>>  - overflows of the timestamp FIFO in the chip

>> I currently do not have any indication mechanism as the providers

>>

>> I am dealing with right now does not have overflow hardware detection

>>

>> support. If the chip supports, it should be easy to integrate that feature.

>>

>> I will provide some hook function or change in push_* API to accommodate

>>

>> this in next version of RFC.

>>

>>>  - overflows of software FIFOs as here

>> HTE core records sequence counter as well it callsback the consumer with

>>

>> HTE_TS_DROPPED.

>>

>>>  - lost interupts (if the hw generates interrupts faster than the CPU

>>>    can service them)

>> For this, I have no idea unless hardware supports some sort of mechanism

>>

>> to catch that. For the current providers, as soon as it detects changes on lines

>>

>> it captures TS in its hw fifo. Its interrupt gets generated based on threshold

>>

>> set in that hw fifo. This interrupt is different than the lines of actual device

>>

>> that is why I said I have no idea how we can tackle that. Let me know if there

>>

>> is any idea or reference of the codes which does tackle this.

>>

> As far as I am aware there is no solution, given your suggested

> architecture.

>

> Your architecture is inherently fragile, as you try to use one stream

> of data (the timestamp fifo) to provide supplementary info for another

> (the physical irq).  Guaranteeing that the two are synchronised is

> impossible - even if you can get them synced at some point, they can

> fall out of sync without any indication.

> That is a recipe for Ingenuity flight 6.

>

> My solution would be to use the hte timestamp fifo as the event source,

> rather than the physical irq.  With only one event source the 

> synchronisation problem disappears.  As to how to implement that,

> gpiolib-cdev would request a line from the hte subsystem and register

> and event handler for it, much as it does currently with the irq

> subsystem. That event handler would translate the hte events into gpio

> events.


I have to circle back to here regarding the event handler thing. I

surely did not understand fifo as event source rather than physical irq

part? I believe you are suggesting to have somekind of buffer abstraction

layer for the hardware fifo similar to what I have with software buffer and

register handler to that buffer, right?


The current implementation I have (not with gpiolib/HTE integration)

is partially simlar to event handler mechanism except that it does not send data

with it. See hte-tegra194-irq-test.c in this patch.


Coming back to gpiolib/hte integration part and your suggestion about

providing event handler during hte registration. I have below doubts:

1. When HTE calls this provided hte_handler, will it store data into

hte->timestamp_ns directly, I am guessing yes.

2. Does hte handler solution create race between two handlers? i.e. edge_irq_handler and

hte_handler, for the worst case scenario as below?

2.a edge_irq_handler runs first, checks some kind of flag to see if

we are using hardware clock and if yes, directly accesses timestamp_ns

instead of calling line_event_timestamp.

2.b finds timestamp_ns to be invalid since it ran first before hte event handler did.

2.c returns and invokes edge_irq_thread.

2.c.1 Here, should edge_irq_thread wait in cdev till hte handler to be called? If yes,

Doesn't it have case where it will wait forever till hte handler gets called, also not

to mention keeping irq line disabled since IRQF_ONESHOT is specified, could be possible

when provider has gone rogue?

3. I am guessing there will not be dropped event in this suggestion since are

directly sending data without buffering in HTE, that is the good part I believe.


>

> You still have to deal with possible fifo overflows, but if the fifo

> overflows result in discarding the oldest event, rather than the most

> recent, then everything comes out in the wash.  If not then the final

> event in a burst may not correspond to the actual state so you need

> some additional mechanism to address that.

> Either way the consumer needs to be aware that events may be lost - but

> with the event seqno for consumers to detect those lost events we

> already have that covered.


Correct (for the seqno part), you already have seqno, cdev does not need

struct hte_ts_data's u64 seq counter.


On similar note, I was looking at the linereq_put_event

function and I have below doubts:

1. IIUC, you are discarding oldest data when fifo is full, right?

2. There is no indication to waiting client if overflow is true beside pr_debug print.

2.a Does this not block waiting client infinitely since there is no wake_up_poll call

in case of overflow == 1?

2.c If I have missed, what current mechanism cdev provides to client beside seqno

to indicate there is a drop and if there is a drop, what it does to re-sync?


>

>> Regarding HTE/GPIOLIB integration comment:

>>

>> You are right, currently, I have only tsc field returned from struct hte_ts_data

>>

>> to gpiolib. If I can extend that to return hte_ts_data structure which has seq

>>

>> counter, which I believe can be used to track the overflow situation. The

>>

>> dropped scenario can be easily tracked if gpiolib can be notified with above

>>

>> mentioned DROP event through callback. If that is the case, is it ok to have

>>

>> some sort of callback per gpio in gpiolib?

>>

> Even better if you can provide the whole struct hte_ts_data so we have

> the direction as well (assuming all hte providers provide direction?).

> Otherwise gpiolib-cdev may need to read the physical line state and that

> may have changed since the hardware captured the event.

> In the solution I outlined above, the hte_ts_data would be provided to

> the event handler registered by gpiolib-cdev.

> And in this case you could skip buffering the event in hte - it could be

> passed to the event handler as soon as it is read from the hardware - 

> gpiolib-cdev does its own buffering of gpio events.

>

>> Any idea how I can integrate callback notification with gpiolib if you do not agree on

>>

>> above callback suggestion?

>>

> See above.  But this is just my take, so I would get feedback from the

> relevant maintainers or SMEs before you act on anything suggested above.

>

> Cheers,

> Kent.

>
Kent Gibson Aug. 7, 2021, 3:07 a.m. UTC | #10
On Fri, Aug 06, 2021 at 07:41:09PM -0700, Dipen Patel wrote:
> 
> On 7/31/21 8:43 AM, Kent Gibson wrote:
> > On Wed, Jul 28, 2021 at 04:59:08PM -0700, Dipen Patel wrote:
> >> Thanks Kent for the review comment. My responses inline.
> >>
> >> On 7/1/21 7:21 AM, Kent Gibson wrote:
> >>> On Fri, Jun 25, 2021 at 04:55:24PM -0700, Dipen Patel wrote:
> >>>> Tegra194 device has multiple HTE instances also known as GTE
> >>>> (Generic hardware Timestamping Engine) which can timestamp subset of
> >>>> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines
> >>>> and exposes timestamping ability on those lines to the consumers
> >>>> through HTE subsystem.
> >>>>
> >>>> Also, with this patch, added:
> >>>> - documentation about this provider and its capabilities at
> >>>> Documentation/hte.
> >>>> - Compilation support in Makefile and Kconfig
> >>>>
> >>>> Signed-off-by: Dipen Patel <dipenp@nvidia.com>
> >>>> ---
> >>>>  Documentation/hte/index.rst        |  21 ++
> >>>>  Documentation/hte/tegra194-hte.rst |  65 ++++
> >>>>  Documentation/index.rst            |   1 +
> >>>>  drivers/hte/Kconfig                |  12 +
> >>>>  drivers/hte/Makefile               |   1 +
> >>>>  drivers/hte/hte-tegra194.c         | 554 +++++++++++++++++++++++++++++
> >>>>  6 files changed, 654 insertions(+)
> >>>>  create mode 100644 Documentation/hte/index.rst
> >>>>  create mode 100644 Documentation/hte/tegra194-hte.rst
> >>>>  create mode 100644 drivers/hte/hte-tegra194.c
> >>>>
> >>>> diff --git a/Documentation/hte/index.rst b/Documentation/hte/index.rst
> >>>> new file mode 100644
> >>>> index 000000000000..f311ebec6b47
> >>>> --- /dev/null
> >>>> +++ b/Documentation/hte/index.rst
> >>>> @@ -0,0 +1,21 @@
> >>>> +.. SPDX-License-Identifier: GPL-2.0
> >>>> +
> >>>> +============================================
> >>>> +The Linux Hardware Timestamping Engine (HTE)
> >>>> +============================================
> >>>> +
> >>>> +The HTE Subsystem
> >>>> +=================
> >>>> +
> >>>> +.. toctree::
> >>>> +   :maxdepth: 1
> >>>> +
> >>>> +   hte
> >>>> +
> >>>> +HTE Tegra Provider
> >>>> +==================
> >>>> +
> >>>> +.. toctree::
> >>>> +   :maxdepth: 1
> >>>> +
> >>>> +   tegra194-hte
> >>>> \ No newline at end of file
> >>>> diff --git a/Documentation/hte/tegra194-hte.rst b/Documentation/hte/tegra194-hte.rst
> >>>> new file mode 100644
> >>>> index 000000000000..c23eaafcf080
> >>>> --- /dev/null
> >>>> +++ b/Documentation/hte/tegra194-hte.rst
> >>>> @@ -0,0 +1,65 @@
> >>>> +HTE Kernel provider driver
> >>>> +==========================
> >>>> +
> >>>> +Description
> >>>> +-----------
> >>>> +The Nvidia tegra194 chip has many hardware timestamping engine (HTE) instances
> >>>> +known as generic timestamping engine (GTE). This provider driver implements
> >>>> +two GTE instances 1) GPIO GTE and 2) IRQ GTE. The both GTEs instances get the
> >>>> +timestamp from the system counter TSC which has 31.25MHz clock rate, and the
> >>>> +driver converts clock tick rate to nano seconds before storing it as timestamp
> >>>> +value.
> >>>> +
> >>>> +GPIO GTE
> >>>> +--------
> >>>> +
> >>>> +This GTE instance help timestamps GPIO in real time, for that to happen GPIO
> >>>> +needs to be configured as input and IRQ needs to ba enabled as well. The only
> >>>> +always on (AON) gpio controller instance supports timestamping GPIOs in
> >>>> +realtime and it has 39 GPIO lines. There is also a dependency on AON GPIO
> >>>> +controller as it requires very specific bits to be set in GPIO config register.
> >>>> +It in a way creates cyclic dependency between GTE and GPIO controller. The GTE
> >>>> +GPIO functionality is accessed from the GPIOLIB. It can support both the in
> >>>> +kernel and userspace consumers. In the later case, requests go through GPIOLIB
> >>>> +CDEV framework. The below APIs are added in GPIOLIB framework to access HTE
> >>>> +subsystem and GPIO GTE for in kernel consumers.
> >>>> +
> >>>> +.. c:function:: int gpiod_hw_timestamp_control( struct gpio_desc *desc, bool enable )
> >>>> +
> >>>> +	To enable HTE on given GPIO line.
> >>>> +
> >>>> +.. c:function:: u64 gpiod_get_hw_timestamp( struct gpio_desc *desc, bool block )
> >>>> +
> >>>> +	To retrieve hardwre timestamp in nano seconds.
> >>>> +
> >>>> +.. c:function:: bool gpiod_is_hw_timestamp_enabled( const struct gpio_desc *desc )
> >>>> +
> >>>> +	To query if HTE is enabled on the given GPIO.
> >>>> +
> >>>> +There is hte-tegra194-gpio-test.c, located in ``drivers/hte/`` directory, test
> >>>> +driver which demonstrates above APIs for the Jetson AGX platform. For userspace
> >>>> +consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HARDWARE flag must be specifed during
> >>>> +IOCTL calls, refer ``tools/gpio/gpio-event-mon.c``, which returns the timestamp
> >>>> +in nano second.
> >>>> +
> >>> <snip>
> >>>
> >>>> +
> >>>> +static void tegra_hte_read_fifo(struct tegra_hte_soc *gs)
> >>>> +{
> >>>> +	u32 tsh, tsl, src, pv, cv, acv, slice, bit_index, line_id;
> >>>> +	u64 tsc;
> >>>> +	int dir;
> >>>> +	struct hte_ts_data el;
> >>>> +
> >>>> +	while ((tegra_hte_readl(gs, HTE_TESTATUS) >>
> >>>> +		HTE_TESTATUS_OCCUPANCY_SHIFT) &
> >>>> +		HTE_TESTATUS_OCCUPANCY_MASK) {
> >>>> +		tsh = tegra_hte_readl(gs, HTE_TETSCH);
> >>>> +		tsl = tegra_hte_readl(gs, HTE_TETSCL);
> >>>> +		tsc = (((u64)tsh << 32) | tsl);
> >>>> +
> >>>> +		src = tegra_hte_readl(gs, HTE_TESRC);
> >>>> +		slice = (src >> HTE_TESRC_SLICE_SHIFT) &
> >>>> +			    HTE_TESRC_SLICE_DEFAULT_MASK;
> >>>> +
> >>>> +		pv = tegra_hte_readl(gs, HTE_TEPCV);
> >>>> +		cv = tegra_hte_readl(gs, HTE_TECCV);
> >>>> +		acv = pv ^ cv;
> >>>> +		while (acv) {
> >>>> +			bit_index = __builtin_ctz(acv);
> >>>> +			if ((pv >> bit_index) & BIT(0))
> >>>> +				dir = HTE_EVENT_RISING_EDGE;
> >>>> +			else
> >>>> +				dir = HTE_EVENT_FALLING_EDGE;
> >>>> +
> >>>> +			line_id = bit_index + (slice << 5);
> >>>> +			el.dir = dir;
> >>>> +			el.tsc = tsc << HTE_TS_NS_SHIFT;
> >>>> +			hte_push_ts_ns_atomic(gs->chip, line_id, &el,
> >>>> +					      sizeof(el));
> >>>> +			acv &= ~BIT(bit_index);
> >>>> +		}
> >>>> +		tegra_hte_writel(gs, HTE_TECMD, HTE_TECMD_CMD_POP);
> >>>> +	}
> >>>> +}
> >>> What happens when the hte_push_ts_ns_atomic() fails?
> >>> The timestamp will be quietly dropped?
> >>> What happens when the interrupt corresponding to that dropped timestamp
> >>> asks for it?  The irq handler thread will block until it can get a
> >>> timestamp from the subsequent interrupt?
> >> Two things happen, 1) at the push, HTE core increments seq counter
> >>
> >> 2) If the consumer has provided callback, it will either call that callback
> >>
> >> with HTE_TS_DROPPED or HTE_TS_AVAIL. The seq counter gives indirect
> >>
> >> view of dropped ts. However, I see the problem with the consumers not
> >>
> >> providing callback, in that case, push_ts* API just wakes up process without
> >>
> >> indicating why (assuming notify variable is true or else there is a chance for
> >>
> >> the thread to block forever). One easy approach I can think of for now is to
> >>
> >> make callback mandatory (which is optional right now), I will have to rethink
> >>
> >> that scenario and will push corrected version next RFC version.
> >>
> >> Thanks for pointing out.
> >>
> > I'm not sure you understood my question, which was intended to
> > demonstrate how an overflow here would break your gpio integration, but I
> > am certain that I don't understand your answer.
> >
> > Using the callback to signal fifo overflow to the consumer is crazy.
> > If the consumer is too busy to service the fifo then they probably wont
> > be prepared to deal with the callback either. And the primary purpose of
> > the fifo is to decouple the producer and consumer, so requiring a callback
> > defeats the whole purpose of having the fifo there in the first place.
> >
> >>> Which brings me back to the concern I have with the approach used in
> >>> the hte/gpiolib integration - how do you guarantee that the timestamp
> >>> returned by gpiod_get_hw_timestamp() corresponds to the irq interrupt
> >>> being handled, particularly in the face of errors such as:
> >>>  - overflows of the timestamp FIFO in the chip
> >> I currently do not have any indication mechanism as the providers
> >>
> >> I am dealing with right now does not have overflow hardware detection
> >>
> >> support. If the chip supports, it should be easy to integrate that feature.
> >>
> >> I will provide some hook function or change in push_* API to accommodate
> >>
> >> this in next version of RFC.
> >>
> >>>  - overflows of software FIFOs as here
> >> HTE core records sequence counter as well it callsback the consumer with
> >>
> >> HTE_TS_DROPPED.
> >>
> >>>  - lost interupts (if the hw generates interrupts faster than the CPU
> >>>    can service them)
> >> For this, I have no idea unless hardware supports some sort of mechanism
> >>
> >> to catch that. For the current providers, as soon as it detects changes on lines
> >>
> >> it captures TS in its hw fifo. Its interrupt gets generated based on threshold
> >>
> >> set in that hw fifo. This interrupt is different than the lines of actual device
> >>
> >> that is why I said I have no idea how we can tackle that. Let me know if there
> >>
> >> is any idea or reference of the codes which does tackle this.
> >>
> > As far as I am aware there is no solution, given your suggested
> > architecture.
> >
> > Your architecture is inherently fragile, as you try to use one stream
> > of data (the timestamp fifo) to provide supplementary info for another
> > (the physical irq).  Guaranteeing that the two are synchronised is
> > impossible - even if you can get them synced at some point, they can
> > fall out of sync without any indication.
> > That is a recipe for Ingenuity flight 6.
> >
> > My solution would be to use the hte timestamp fifo as the event source,
> > rather than the physical irq.  With only one event source the 
> > synchronisation problem disappears.  As to how to implement that,
> > gpiolib-cdev would request a line from the hte subsystem and register
> > and event handler for it, much as it does currently with the irq
> > subsystem. That event handler would translate the hte events into gpio
> > events.
> 
> I have to circle back to here regarding the event handler thing. I
> 
> surely did not understand fifo as event source rather than physical irq
> 
> part? I believe you are suggesting to have somekind of buffer abstraction
> 
> layer for the hardware fifo similar to what I have with software buffer and
> 
> register handler to that buffer, right?
> 

No, what is the purpose of that buffering and watermarking in software?
Just pass the timestamped edge event direct to the consumer.
Let the consumer do any buffering if necessary, as Jonathon Cameron
also suggested in the 02/11 thread.

> 
> The current implementation I have (not with gpiolib/HTE integration)
> 
> is partially simlar to event handler mechanism except that it does not send data
> 
> with it. See hte-tegra194-irq-test.c in this patch.
> 
> 
> Coming back to gpiolib/hte integration part and your suggestion about
> 
> providing event handler during hte registration. I have below doubts:
> 
> 1. When HTE calls this provided hte_handler, will it store data into
> 
> hte->timestamp_ns directly, I am guessing yes.
> 

This is implementation detail of the hte/gpiolib interface that I leave
for you to suggest.  Work something out.

> 2. Does hte handler solution create race between two handlers? i.e. edge_irq_handler and
> 
> hte_handler, for the worst case scenario as below?
> 

No.  If hardware timestamp is selected then no irq is requested from the
irq subsystem for that line - only from the hte subsystem instead.
So there will be no edge_irq_handler call for that line, so no possible race.

> 2.a edge_irq_handler runs first, checks some kind of flag to see if
> 
> we are using hardware clock and if yes, directly accesses timestamp_ns
> 
> instead of calling line_event_timestamp.
> 
> 2.b finds timestamp_ns to be invalid since it ran first before hte event handler did.
> 
> 2.c returns and invokes edge_irq_thread.
> 
> 2.c.1 Here, should edge_irq_thread wait in cdev till hte handler to be called? If yes,
> 
> Doesn't it have case where it will wait forever till hte handler gets called, also not
> 
> to mention keeping irq line disabled since IRQF_ONESHOT is specified, could be possible
> 
> when provider has gone rogue?
> 
> 3. I am guessing there will not be dropped event in this suggestion since are
> 
> directly sending data without buffering in HTE, that is the good part I believe.
> 
> 
> >
> > You still have to deal with possible fifo overflows, but if the fifo
> > overflows result in discarding the oldest event, rather than the most
> > recent, then everything comes out in the wash.  If not then the final
> > event in a burst may not correspond to the actual state so you need
> > some additional mechanism to address that.
> > Either way the consumer needs to be aware that events may be lost - but
> > with the event seqno for consumers to detect those lost events we
> > already have that covered.
> 
> Correct (for the seqno part), you already have seqno, cdev does not need
> 
> struct hte_ts_data's u64 seq counter.
> 
> 
> On similar note, I was looking at the linereq_put_event
> 
> function and I have below doubts:
> 
> 1. IIUC, you are discarding oldest data when fifo is full, right?
> 

Correct.

> 2. There is no indication to waiting client if overflow is true beside pr_debug print.
> 
> 2.a Does this not block waiting client infinitely since there is no wake_up_poll call
> 
> in case of overflow == 1?
> 

No - there already was a wake_up_poll call for the entry discarded by
the kfifo_skip().

You dropped 2.b intentionally, right?  Buffer overflow perhaps??

> 2.c If I have missed, what current mechanism cdev provides to client beside seqno
> 
> to indicate there is a drop and if there is a drop, what it does to re-sync?
> 

Just seqno.  Overflows in the cdev event buffer discard the oldest
events, so the final event that the client reads will correspond to
current state. There is an event waiting for the client that, due to
the seqno jump, indicates the overflow.  What else do they need?
And what is there to resync?

Not sorry if I'm getting short with you here - I'm getting really tired
of this subject as we're clearly not communicating well and are repeatedly
covering the same ground.

Cheers,
Kent.
Kent Gibson Aug. 7, 2021, 4:51 a.m. UTC | #11
On Fri, Aug 06, 2021 at 09:52:54PM -0700, Dipen Patel wrote:
> 
> On 8/6/21 8:07 PM, Kent Gibson wrote:
> > On Fri, Aug 06, 2021 at 07:41:09PM -0700, Dipen Patel wrote:
> >> On 7/31/21 8:43 AM, Kent Gibson wrote:
> >>> On Wed, Jul 28, 2021 at 04:59:08PM -0700, Dipen Patel wrote:
> >>>> Thanks Kent for the review comment. My responses inline.
> >>>>

<snip>

> >
> >> 2. Does hte handler solution create race between two handlers? i.e. edge_irq_handler and
> >>
> >> hte_handler, for the worst case scenario as below?
> >>
> > No.  If hardware timestamp is selected then no irq is requested from the
> > irq subsystem for that line - only from the hte subsystem instead.
> > So there will be no edge_irq_handler call for that line, so no possible race.
> 
> That is not possible for certain providers, for example the one I am dealing
> 
> with which requires GPIO line to be requested as input and IRQ needs to
> 
> be enabled on them.
> 

So, for your hte subsystem to work, the consumer has to also request
a line from the irq subsystem?  That makes sense to you?
Have hte do that, rather than the consumer.

And another reason it makes sense to integrate this with irq...

Cheers,
Kent.
Dipen Patel Aug. 7, 2021, 4:52 a.m. UTC | #12
On 8/6/21 8:07 PM, Kent Gibson wrote:
> On Fri, Aug 06, 2021 at 07:41:09PM -0700, Dipen Patel wrote:

>> On 7/31/21 8:43 AM, Kent Gibson wrote:

>>> On Wed, Jul 28, 2021 at 04:59:08PM -0700, Dipen Patel wrote:

>>>> Thanks Kent for the review comment. My responses inline.

>>>>

>>>> On 7/1/21 7:21 AM, Kent Gibson wrote:

>>>>> On Fri, Jun 25, 2021 at 04:55:24PM -0700, Dipen Patel wrote:

>>>>>> Tegra194 device has multiple HTE instances also known as GTE

>>>>>> (Generic hardware Timestamping Engine) which can timestamp subset of

>>>>>> SoC lines/signals. This provider driver focuses on IRQ and GPIO lines

>>>>>> and exposes timestamping ability on those lines to the consumers

>>>>>> through HTE subsystem.

>>>>>>

>>>>>> Also, with this patch, added:

>>>>>> - documentation about this provider and its capabilities at

>>>>>> Documentation/hte.

>>>>>> - Compilation support in Makefile and Kconfig

>>>>>>

>>>>>> Signed-off-by: Dipen Patel <dipenp@nvidia.com>

>>>>>> ---

>>>>>>  Documentation/hte/index.rst        |  21 ++

>>>>>>  Documentation/hte/tegra194-hte.rst |  65 ++++

>>>>>>  Documentation/index.rst            |   1 +

>>>>>>  drivers/hte/Kconfig                |  12 +

>>>>>>  drivers/hte/Makefile               |   1 +

>>>>>>  drivers/hte/hte-tegra194.c         | 554 +++++++++++++++++++++++++++++

>>>>>>  6 files changed, 654 insertions(+)

>>>>>>  create mode 100644 Documentation/hte/index.rst

>>>>>>  create mode 100644 Documentation/hte/tegra194-hte.rst

>>>>>>  create mode 100644 drivers/hte/hte-tegra194.c

>>>>>>

>>>>>> diff --git a/Documentation/hte/index.rst b/Documentation/hte/index.rst

>>>>>> new file mode 100644

>>>>>> index 000000000000..f311ebec6b47

>>>>>> --- /dev/null

>>>>>> +++ b/Documentation/hte/index.rst

>>>>>> @@ -0,0 +1,21 @@

>>>>>> +.. SPDX-License-Identifier: GPL-2.0

>>>>>> +

>>>>>> +============================================

>>>>>> +The Linux Hardware Timestamping Engine (HTE)

>>>>>> +============================================

>>>>>> +

>>>>>> +The HTE Subsystem

>>>>>> +=================

>>>>>> +

>>>>>> +.. toctree::

>>>>>> +   :maxdepth: 1

>>>>>> +

>>>>>> +   hte

>>>>>> +

>>>>>> +HTE Tegra Provider

>>>>>> +==================

>>>>>> +

>>>>>> +.. toctree::

>>>>>> +   :maxdepth: 1

>>>>>> +

>>>>>> +   tegra194-hte

>>>>>> \ No newline at end of file

>>>>>> diff --git a/Documentation/hte/tegra194-hte.rst b/Documentation/hte/tegra194-hte.rst

>>>>>> new file mode 100644

>>>>>> index 000000000000..c23eaafcf080

>>>>>> --- /dev/null

>>>>>> +++ b/Documentation/hte/tegra194-hte.rst

>>>>>> @@ -0,0 +1,65 @@

>>>>>> +HTE Kernel provider driver

>>>>>> +==========================

>>>>>> +

>>>>>> +Description

>>>>>> +-----------

>>>>>> +The Nvidia tegra194 chip has many hardware timestamping engine (HTE) instances

>>>>>> +known as generic timestamping engine (GTE). This provider driver implements

>>>>>> +two GTE instances 1) GPIO GTE and 2) IRQ GTE. The both GTEs instances get the

>>>>>> +timestamp from the system counter TSC which has 31.25MHz clock rate, and the

>>>>>> +driver converts clock tick rate to nano seconds before storing it as timestamp

>>>>>> +value.

>>>>>> +

>>>>>> +GPIO GTE

>>>>>> +--------

>>>>>> +

>>>>>> +This GTE instance help timestamps GPIO in real time, for that to happen GPIO

>>>>>> +needs to be configured as input and IRQ needs to ba enabled as well. The only

>>>>>> +always on (AON) gpio controller instance supports timestamping GPIOs in

>>>>>> +realtime and it has 39 GPIO lines. There is also a dependency on AON GPIO

>>>>>> +controller as it requires very specific bits to be set in GPIO config register.

>>>>>> +It in a way creates cyclic dependency between GTE and GPIO controller. The GTE

>>>>>> +GPIO functionality is accessed from the GPIOLIB. It can support both the in

>>>>>> +kernel and userspace consumers. In the later case, requests go through GPIOLIB

>>>>>> +CDEV framework. The below APIs are added in GPIOLIB framework to access HTE

>>>>>> +subsystem and GPIO GTE for in kernel consumers.

>>>>>> +

>>>>>> +.. c:function:: int gpiod_hw_timestamp_control( struct gpio_desc *desc, bool enable )

>>>>>> +

>>>>>> +	To enable HTE on given GPIO line.

>>>>>> +

>>>>>> +.. c:function:: u64 gpiod_get_hw_timestamp( struct gpio_desc *desc, bool block )

>>>>>> +

>>>>>> +	To retrieve hardwre timestamp in nano seconds.

>>>>>> +

>>>>>> +.. c:function:: bool gpiod_is_hw_timestamp_enabled( const struct gpio_desc *desc )

>>>>>> +

>>>>>> +	To query if HTE is enabled on the given GPIO.

>>>>>> +

>>>>>> +There is hte-tegra194-gpio-test.c, located in ``drivers/hte/`` directory, test

>>>>>> +driver which demonstrates above APIs for the Jetson AGX platform. For userspace

>>>>>> +consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HARDWARE flag must be specifed during

>>>>>> +IOCTL calls, refer ``tools/gpio/gpio-event-mon.c``, which returns the timestamp

>>>>>> +in nano second.

>>>>>> +

>>>>> <snip>

>>>>>

>>>>>> +

>>>>>> +static void tegra_hte_read_fifo(struct tegra_hte_soc *gs)

>>>>>> +{

>>>>>> +	u32 tsh, tsl, src, pv, cv, acv, slice, bit_index, line_id;

>>>>>> +	u64 tsc;

>>>>>> +	int dir;

>>>>>> +	struct hte_ts_data el;

>>>>>> +

>>>>>> +	while ((tegra_hte_readl(gs, HTE_TESTATUS) >>

>>>>>> +		HTE_TESTATUS_OCCUPANCY_SHIFT) &

>>>>>> +		HTE_TESTATUS_OCCUPANCY_MASK) {

>>>>>> +		tsh = tegra_hte_readl(gs, HTE_TETSCH);

>>>>>> +		tsl = tegra_hte_readl(gs, HTE_TETSCL);

>>>>>> +		tsc = (((u64)tsh << 32) | tsl);

>>>>>> +

>>>>>> +		src = tegra_hte_readl(gs, HTE_TESRC);

>>>>>> +		slice = (src >> HTE_TESRC_SLICE_SHIFT) &

>>>>>> +			    HTE_TESRC_SLICE_DEFAULT_MASK;

>>>>>> +

>>>>>> +		pv = tegra_hte_readl(gs, HTE_TEPCV);

>>>>>> +		cv = tegra_hte_readl(gs, HTE_TECCV);

>>>>>> +		acv = pv ^ cv;

>>>>>> +		while (acv) {

>>>>>> +			bit_index = __builtin_ctz(acv);

>>>>>> +			if ((pv >> bit_index) & BIT(0))

>>>>>> +				dir = HTE_EVENT_RISING_EDGE;

>>>>>> +			else

>>>>>> +				dir = HTE_EVENT_FALLING_EDGE;

>>>>>> +

>>>>>> +			line_id = bit_index + (slice << 5);

>>>>>> +			el.dir = dir;

>>>>>> +			el.tsc = tsc << HTE_TS_NS_SHIFT;

>>>>>> +			hte_push_ts_ns_atomic(gs->chip, line_id, &el,

>>>>>> +					      sizeof(el));

>>>>>> +			acv &= ~BIT(bit_index);

>>>>>> +		}

>>>>>> +		tegra_hte_writel(gs, HTE_TECMD, HTE_TECMD_CMD_POP);

>>>>>> +	}

>>>>>> +}

>>>>> What happens when the hte_push_ts_ns_atomic() fails?

>>>>> The timestamp will be quietly dropped?

>>>>> What happens when the interrupt corresponding to that dropped timestamp

>>>>> asks for it?  The irq handler thread will block until it can get a

>>>>> timestamp from the subsequent interrupt?

>>>> Two things happen, 1) at the push, HTE core increments seq counter

>>>>

>>>> 2) If the consumer has provided callback, it will either call that callback

>>>>

>>>> with HTE_TS_DROPPED or HTE_TS_AVAIL. The seq counter gives indirect

>>>>

>>>> view of dropped ts. However, I see the problem with the consumers not

>>>>

>>>> providing callback, in that case, push_ts* API just wakes up process without

>>>>

>>>> indicating why (assuming notify variable is true or else there is a chance for

>>>>

>>>> the thread to block forever). One easy approach I can think of for now is to

>>>>

>>>> make callback mandatory (which is optional right now), I will have to rethink

>>>>

>>>> that scenario and will push corrected version next RFC version.

>>>>

>>>> Thanks for pointing out.

>>>>

>>> I'm not sure you understood my question, which was intended to

>>> demonstrate how an overflow here would break your gpio integration, but I

>>> am certain that I don't understand your answer.

>>>

>>> Using the callback to signal fifo overflow to the consumer is crazy.

>>> If the consumer is too busy to service the fifo then they probably wont

>>> be prepared to deal with the callback either. And the primary purpose of

>>> the fifo is to decouple the producer and consumer, so requiring a callback

>>> defeats the whole purpose of having the fifo there in the first place.

>>>

>>>>> Which brings me back to the concern I have with the approach used in

>>>>> the hte/gpiolib integration - how do you guarantee that the timestamp

>>>>> returned by gpiod_get_hw_timestamp() corresponds to the irq interrupt

>>>>> being handled, particularly in the face of errors such as:

>>>>>  - overflows of the timestamp FIFO in the chip

>>>> I currently do not have any indication mechanism as the providers

>>>>

>>>> I am dealing with right now does not have overflow hardware detection

>>>>

>>>> support. If the chip supports, it should be easy to integrate that feature.

>>>>

>>>> I will provide some hook function or change in push_* API to accommodate

>>>>

>>>> this in next version of RFC.

>>>>

>>>>>  - overflows of software FIFOs as here

>>>> HTE core records sequence counter as well it callsback the consumer with

>>>>

>>>> HTE_TS_DROPPED.

>>>>

>>>>>  - lost interupts (if the hw generates interrupts faster than the CPU

>>>>>    can service them)

>>>> For this, I have no idea unless hardware supports some sort of mechanism

>>>>

>>>> to catch that. For the current providers, as soon as it detects changes on lines

>>>>

>>>> it captures TS in its hw fifo. Its interrupt gets generated based on threshold

>>>>

>>>> set in that hw fifo. This interrupt is different than the lines of actual device

>>>>

>>>> that is why I said I have no idea how we can tackle that. Let me know if there

>>>>

>>>> is any idea or reference of the codes which does tackle this.

>>>>

>>> As far as I am aware there is no solution, given your suggested

>>> architecture.

>>>

>>> Your architecture is inherently fragile, as you try to use one stream

>>> of data (the timestamp fifo) to provide supplementary info for another

>>> (the physical irq).  Guaranteeing that the two are synchronised is

>>> impossible - even if you can get them synced at some point, they can

>>> fall out of sync without any indication.

>>> That is a recipe for Ingenuity flight 6.

>>>

>>> My solution would be to use the hte timestamp fifo as the event source,

>>> rather than the physical irq.  With only one event source the 

>>> synchronisation problem disappears.  As to how to implement that,

>>> gpiolib-cdev would request a line from the hte subsystem and register

>>> and event handler for it, much as it does currently with the irq

>>> subsystem. That event handler would translate the hte events into gpio

>>> events.

>> I have to circle back to here regarding the event handler thing. I

>>

>> surely did not understand fifo as event source rather than physical irq

>>

>> part? I believe you are suggesting to have somekind of buffer abstraction

>>

>> layer for the hardware fifo similar to what I have with software buffer and

>>

>> register handler to that buffer, right?

>>

> No, what is the purpose of that buffering and watermarking in software?

> Just pass the timestamped edge event direct to the consumer.

> Let the consumer do any buffering if necessary, as Jonathon Cameron

> also suggested in the 02/11 thread.

>

>> The current implementation I have (not with gpiolib/HTE integration)

>>

>> is partially simlar to event handler mechanism except that it does not send data

>>

>> with it. See hte-tegra194-irq-test.c in this patch.

>>

>>

>> Coming back to gpiolib/hte integration part and your suggestion about

>>

>> providing event handler during hte registration. I have below doubts:

>>

>> 1. When HTE calls this provided hte_handler, will it store data into

>>

>> hte->timestamp_ns directly, I am guessing yes.

>>

> This is implementation detail of the hte/gpiolib interface that I leave

> for you to suggest.  Work something out.

>

>> 2. Does hte handler solution create race between two handlers? i.e. edge_irq_handler and

>>

>> hte_handler, for the worst case scenario as below?

>>

> No.  If hardware timestamp is selected then no irq is requested from the

> irq subsystem for that line - only from the hte subsystem instead.

> So there will be no edge_irq_handler call for that line, so no possible race.


That is not possible for certain providers, for example the one I am dealing

with which requires GPIO line to be requested as input and IRQ needs to

be enabled on them.

>

>> 2.a edge_irq_handler runs first, checks some kind of flag to see if

>>

>> we are using hardware clock and if yes, directly accesses timestamp_ns

>>

>> instead of calling line_event_timestamp.

>>

>> 2.b finds timestamp_ns to be invalid since it ran first before hte event handler did.

>>

>> 2.c returns and invokes edge_irq_thread.

>>

>> 2.c.1 Here, should edge_irq_thread wait in cdev till hte handler to be called? If yes,

>>

>> Doesn't it have case where it will wait forever till hte handler gets called, also not

>>

>> to mention keeping irq line disabled since IRQF_ONESHOT is specified, could be possible

>>

>> when provider has gone rogue?

>>

>> 3. I am guessing there will not be dropped event in this suggestion since are

>>

>> directly sending data without buffering in HTE, that is the good part I believe.

>>

>>

>>> You still have to deal with possible fifo overflows, but if the fifo

>>> overflows result in discarding the oldest event, rather than the most

>>> recent, then everything comes out in the wash.  If not then the final

>>> event in a burst may not correspond to the actual state so you need

>>> some additional mechanism to address that.

>>> Either way the consumer needs to be aware that events may be lost - but

>>> with the event seqno for consumers to detect those lost events we

>>> already have that covered.

>> Correct (for the seqno part), you already have seqno, cdev does not need

>>

>> struct hte_ts_data's u64 seq counter.

>>

>>

>> On similar note, I was looking at the linereq_put_event

>>

>> function and I have below doubts:

>>

>> 1. IIUC, you are discarding oldest data when fifo is full, right?

>>

> Correct.

>

>> 2. There is no indication to waiting client if overflow is true beside pr_debug print.

>>

>> 2.a Does this not block waiting client infinitely since there is no wake_up_poll call

>>

>> in case of overflow == 1?

>>

> No - there already was a wake_up_poll call for the entry discarded by

> the kfifo_skip().

>

> You dropped 2.b intentionally, right?  Buffer overflow perhaps??

>

>> 2.c If I have missed, what current mechanism cdev provides to client beside seqno

>>

>> to indicate there is a drop and if there is a drop, what it does to re-sync?

>>

> Just seqno.  Overflows in the cdev event buffer discard the oldest

> events, so the final event that the client reads will correspond to

> current state. There is an event waiting for the client that, due to

> the seqno jump, indicates the overflow.  What else do they need?

> And what is there to resync?

>

> Not sorry if I'm getting short with you here - I'm getting really tired

> of this subject as we're clearly not communicating well and are repeatedly

> covering the same ground.

Sure, no problem...
>

> Cheers,

> Kent.
Dipen Patel Aug. 7, 2021, 5:35 a.m. UTC | #13
On 8/6/21 9:51 PM, Kent Gibson wrote:
> On Fri, Aug 06, 2021 at 09:52:54PM -0700, Dipen Patel wrote:

>> On 8/6/21 8:07 PM, Kent Gibson wrote:

>>> On Fri, Aug 06, 2021 at 07:41:09PM -0700, Dipen Patel wrote:

>>>> On 7/31/21 8:43 AM, Kent Gibson wrote:

>>>>> On Wed, Jul 28, 2021 at 04:59:08PM -0700, Dipen Patel wrote:

>>>>>> Thanks Kent for the review comment. My responses inline.

>>>>>>

> <snip>

>

>>>> 2. Does hte handler solution create race between two handlers? i.e. edge_irq_handler and

>>>>

>>>> hte_handler, for the worst case scenario as below?

>>>>

>>> No.  If hardware timestamp is selected then no irq is requested from the

>>> irq subsystem for that line - only from the hte subsystem instead.

>>> So there will be no edge_irq_handler call for that line, so no possible race.

>> That is not possible for certain providers, for example the one I am dealing

>>

>> with which requires GPIO line to be requested as input and IRQ needs to

>>

>> be enabled on them.

>>

> So, for your hte subsystem to work, the consumer has to also request

> a line from the irq subsystem?


Yes

>   That makes sense to you?

Its not me, its peculiarity of the hardware that I am dealing with.
> Have hte do that, rather than the consumer.


Sure, for cdev it would mean to duplicate (most of) the edge* or line_create

code in HTE. For such hardware, my initial doubt remains the same about

the worst case scenario between two handlers, but perhaps that's

implementation details for hte to handle.

>

> And another reason it makes sense to integrate this with irq...


Alright, will explore this route as well. I remember both Thierry[1] and

Marc[2] raised some doubts (time to revive that discussion).


[1]: https://lore.kernel.org/lkml/YFm9r%2FtFkzVlYDEp@orome.fritz.box/

[2]: https://lore.kernel.org/lkml/87h7l1k9yi.wl-maz@kernel.org/

>

> Cheers,

> Kent.
Kent Gibson Aug. 7, 2021, 5:42 a.m. UTC | #14
On Fri, Aug 06, 2021 at 10:35:10PM -0700, Dipen Patel wrote:
> 
> On 8/6/21 9:51 PM, Kent Gibson wrote:
> > On Fri, Aug 06, 2021 at 09:52:54PM -0700, Dipen Patel wrote:
> >> On 8/6/21 8:07 PM, Kent Gibson wrote:
> >>> On Fri, Aug 06, 2021 at 07:41:09PM -0700, Dipen Patel wrote:
> >>>> On 7/31/21 8:43 AM, Kent Gibson wrote:
> >>>>> On Wed, Jul 28, 2021 at 04:59:08PM -0700, Dipen Patel wrote:
> >>>>>> Thanks Kent for the review comment. My responses inline.
> >>>>>>
> > <snip>
> >
> >>>> 2. Does hte handler solution create race between two handlers? i.e. edge_irq_handler and
> >>>>
> >>>> hte_handler, for the worst case scenario as below?
> >>>>
> >>> No.  If hardware timestamp is selected then no irq is requested from the
> >>> irq subsystem for that line - only from the hte subsystem instead.
> >>> So there will be no edge_irq_handler call for that line, so no possible race.
> >> That is not possible for certain providers, for example the one I am dealing
> >>
> >> with which requires GPIO line to be requested as input and IRQ needs to
> >>
> >> be enabled on them.
> >>
> > So, for your hte subsystem to work, the consumer has to also request
> > a line from the irq subsystem?
> 
> Yes
> 
> >   That makes sense to you?
> Its not me, its peculiarity of the hardware that I am dealing with.

My point is that the peculiarities of the hardware should be hidden from
the hte API user, especially if it is only necessary for some hardware.

> > Have hte do that, rather than the consumer.
> 
> Sure, for cdev it would mean to duplicate (most of) the edge* or line_create
> 
> code in HTE.

And your current way every other hte user will have to duplicate the
gpiolib-cdev code....

> For such hardware, my initial doubt remains the same about
> 
> the worst case scenario between two handlers, but perhaps that's
> 
> implementation details for hte to handle.
> 

Indeed.

Cheers,
Kent.

> >
> > And another reason it makes sense to integrate this with irq...
> 
> Alright, will explore this route as well. I remember both Thierry[1] and
> 
> Marc[2] raised some doubts (time to revive that discussion).
> 
> 
> [1]: https://lore.kernel.org/lkml/YFm9r%2FtFkzVlYDEp@orome.fritz.box/
> 
> [2]: https://lore.kernel.org/lkml/87h7l1k9yi.wl-maz@kernel.org/
> 
> >
> > Cheers,
> > Kent.
Dipen Patel Aug. 7, 2021, 5:47 a.m. UTC | #15
On 8/6/21 10:35 PM, Dipen Patel wrote:
> On 8/6/21 9:51 PM, Kent Gibson wrote:

>> On Fri, Aug 06, 2021 at 09:52:54PM -0700, Dipen Patel wrote:

>>> On 8/6/21 8:07 PM, Kent Gibson wrote:

>>>> On Fri, Aug 06, 2021 at 07:41:09PM -0700, Dipen Patel wrote:

>>>>> On 7/31/21 8:43 AM, Kent Gibson wrote:

>>>>>> On Wed, Jul 28, 2021 at 04:59:08PM -0700, Dipen Patel wrote:

>>>>>>> Thanks Kent for the review comment. My responses inline.

>>>>>>>

>> <snip>

>>

>>>>> 2. Does hte handler solution create race between two handlers? i.e. edge_irq_handler and

>>>>>

>>>>> hte_handler, for the worst case scenario as below?

>>>>>

>>>> No.  If hardware timestamp is selected then no irq is requested from the

>>>> irq subsystem for that line - only from the hte subsystem instead.

>>>> So there will be no edge_irq_handler call for that line, so no possible race.

>>> That is not possible for certain providers, for example the one I am dealing

>>>

>>> with which requires GPIO line to be requested as input and IRQ needs to

>>>

>>> be enabled on them.

>>>

>> So, for your hte subsystem to work, the consumer has to also request

>> a line from the irq subsystem?

> Yes

>

>>   That makes sense to you?

> Its not me, its peculiarity of the hardware that I am dealing with.

>> Have hte do that, rather than the consumer.

> Sure, for cdev it would mean to duplicate (most of) the edge* or line_create

>

> code in HTE.


Ignore code duplicate comment, shouldn't be big deal.


> For such hardware, my initial doubt remains the same about

>

> the worst case scenario between two handlers, but perhaps that's

>

> implementation details for hte to handle.

>

>> And another reason it makes sense to integrate this with irq...

> Alright, will explore this route as well. I remember both Thierry[1] and

>

> Marc[2] raised some doubts (time to revive that discussion).

>

>

> [1]: https://lore.kernel.org/lkml/YFm9r%2FtFkzVlYDEp@orome.fritz.box/

>

> [2]: https://lore.kernel.org/lkml/87h7l1k9yi.wl-maz@kernel.org/

>

>> Cheers,

>> Kent.
diff mbox series

Patch

diff --git a/Documentation/hte/index.rst b/Documentation/hte/index.rst
new file mode 100644
index 000000000000..f311ebec6b47
--- /dev/null
+++ b/Documentation/hte/index.rst
@@ -0,0 +1,21 @@ 
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+The Linux Hardware Timestamping Engine (HTE)
+============================================
+
+The HTE Subsystem
+=================
+
+.. toctree::
+   :maxdepth: 1
+
+   hte
+
+HTE Tegra Provider
+==================
+
+.. toctree::
+   :maxdepth: 1
+
+   tegra194-hte
\ No newline at end of file
diff --git a/Documentation/hte/tegra194-hte.rst b/Documentation/hte/tegra194-hte.rst
new file mode 100644
index 000000000000..c23eaafcf080
--- /dev/null
+++ b/Documentation/hte/tegra194-hte.rst
@@ -0,0 +1,65 @@ 
+HTE Kernel provider driver
+==========================
+
+Description
+-----------
+The Nvidia tegra194 chip has many hardware timestamping engine (HTE) instances
+known as generic timestamping engine (GTE). This provider driver implements
+two GTE instances 1) GPIO GTE and 2) IRQ GTE. The both GTEs instances get the
+timestamp from the system counter TSC which has 31.25MHz clock rate, and the
+driver converts clock tick rate to nano seconds before storing it as timestamp
+value.
+
+GPIO GTE
+--------
+
+This GTE instance help timestamps GPIO in real time, for that to happen GPIO
+needs to be configured as input and IRQ needs to ba enabled as well. The only
+always on (AON) gpio controller instance supports timestamping GPIOs in
+realtime and it has 39 GPIO lines. There is also a dependency on AON GPIO
+controller as it requires very specific bits to be set in GPIO config register.
+It in a way creates cyclic dependency between GTE and GPIO controller. The GTE
+GPIO functionality is accessed from the GPIOLIB. It can support both the in
+kernel and userspace consumers. In the later case, requests go through GPIOLIB
+CDEV framework. The below APIs are added in GPIOLIB framework to access HTE
+subsystem and GPIO GTE for in kernel consumers.
+
+.. c:function:: int gpiod_hw_timestamp_control( struct gpio_desc *desc, bool enable )
+
+	To enable HTE on given GPIO line.
+
+.. c:function:: u64 gpiod_get_hw_timestamp( struct gpio_desc *desc, bool block )
+
+	To retrieve hardwre timestamp in nano seconds.
+
+.. c:function:: bool gpiod_is_hw_timestamp_enabled( const struct gpio_desc *desc )
+
+	To query if HTE is enabled on the given GPIO.
+
+There is hte-tegra194-gpio-test.c, located in ``drivers/hte/`` directory, test
+driver which demonstrates above APIs for the Jetson AGX platform. For userspace
+consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HARDWARE flag must be specifed during
+IOCTL calls, refer ``tools/gpio/gpio-event-mon.c``, which returns the timestamp
+in nano second.
+
+IRQ GTE
+--------
+
+This GTE instance helps timestamp LIC IRQ lines in real time. There are 352 IRQ
+lines which this instance can help timestamp realtime. The hte devicetree
+binding described at ``Documentation/devicetree/bindings/hte/`` gives out
+example how consumer can request IRQ line, since it is one to one mapping,
+consumers can simply specify IRQ number that they are interested in. There is
+no userspace consumer support for this GTE instance. The sample test code
+hte-tegra194-irq-test.c, located in ``drivers/hte/`` directory,
+demonstrates how to use IRQ GTE instance. The below is sample device tree
+snippet code for the test driver::
+
+ tegra_hte_irq_test {
+        compatible = "nvidia,tegra194-hte-irq-test";
+        htes = <&tegra_hte_lic 0x19>;
+        hte-names = "hte-lic";
+ };
+
+The source code of the driver both IRQ and GPIO GTE is locate at
+``drivers/hte/hte-tegra194.c``.
\ No newline at end of file
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 1b13c2445e87..b41118577fe6 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -138,6 +138,7 @@  needed).
    misc-devices/index
    scheduler/index
    mhi/index
+   hte/index
 
 Architecture-agnostic documentation
 -----------------------------------
diff --git a/drivers/hte/Kconfig b/drivers/hte/Kconfig
index 394e112f7dfb..f7b01fcc7190 100644
--- a/drivers/hte/Kconfig
+++ b/drivers/hte/Kconfig
@@ -20,3 +20,15 @@  menuconfig HTE
 
           If unsure, say no.
 
+if HTE
+
+config HTE_TEGRA194
+	tristate "NVIDIA Tegra194 HTE Support"
+	depends on ARCH_TEGRA_194_SOC
+	help
+	  Enable this option for integrated hardware timestamping engine also
+	  known as generic timestamping engine (GTE) support on NVIDIA Tegra194
+	  systems-on-chip. The driver supports 352 LIC IRQs and 39 AON GPIOs
+	  lines for timestamping in realtime.
+
+endif
diff --git a/drivers/hte/Makefile b/drivers/hte/Makefile
index 9899dbe516f7..52f978cfc913 100644
--- a/drivers/hte/Makefile
+++ b/drivers/hte/Makefile
@@ -1 +1,2 @@ 
 obj-$(CONFIG_HTE)		+= hte.o
+obj-$(CONFIG_HTE_TEGRA194)	+= hte-tegra194.o
\ No newline at end of file
diff --git a/drivers/hte/hte-tegra194.c b/drivers/hte/hte-tegra194.c
new file mode 100644
index 000000000000..8ad10efd3641
--- /dev/null
+++ b/drivers/hte/hte-tegra194.c
@@ -0,0 +1,554 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 NVIDIA Corporation
+ *
+ * Author: Dipen Patel <dipenp@nvidia.com>
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/hte.h>
+#include <linux/uaccess.h>
+
+#define HTE_SUSPEND	0
+
+/* HTE source clock TSC is 31.25MHz */
+#define HTE_TS_CLK_RATE_HZ	31250000ULL
+#define HTE_CLK_RATE_NS		32
+#define HTE_TS_NS_SHIFT	__builtin_ctz(HTE_CLK_RATE_NS)
+
+#define NV_AON_SLICE_INVALID	-1
+
+/* AON HTE line map For slice 1 */
+#define NV_AON_HTE_SLICE1_IRQ_GPIO_28	12
+#define NV_AON_HTE_SLICE1_IRQ_GPIO_29	13
+
+/* AON HTE line map For slice 2 */
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_0	0
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_1	1
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_2	2
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_3	3
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_4	4
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_5	5
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_6	6
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_7	7
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_8	8
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_9	9
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_10	10
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_11	11
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_12	12
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_13	13
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_14	14
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_15	15
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_16	16
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_17	17
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_18	18
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_19	19
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_20	20
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_21	21
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_22	22
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_23	23
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_24	24
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_25	25
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_26	26
+#define NV_AON_HTE_SLICE2_IRQ_GPIO_27	27
+
+/* AON GPIO port AA pins */
+#define NV_AON_GPIO_PORT_AA_0		0
+#define NV_AON_GPIO_PORT_AA_1		1
+#define NV_AON_GPIO_PORT_AA_2		2
+#define NV_AON_GPIO_PORT_AA_3		3
+#define NV_AON_GPIO_PORT_AA_4		4
+#define NV_AON_GPIO_PORT_AA_5		5
+#define NV_AON_GPIO_PORT_AA_6		6
+#define NV_AON_GPIO_PORT_AA_7		7
+
+/* AON GPIO port BB pins */
+#define NV_AON_GPIO_PORT_BB_0		8
+#define NV_AON_GPIO_PORT_BB_1		9
+#define NV_AON_GPIO_PORT_BB_2		10
+#define NV_AON_GPIO_PORT_BB_3		11
+
+/* AON GPIO port CC pins */
+#define NV_AON_GPIO_PORT_CC_0		16
+#define NV_AON_GPIO_PORT_CC_1		17
+#define NV_AON_GPIO_PORT_CC_2		18
+#define NV_AON_GPIO_PORT_CC_3		19
+#define NV_AON_GPIO_PORT_CC_4		20
+#define NV_AON_GPIO_PORT_CC_5		21
+#define NV_AON_GPIO_PORT_CC_6		22
+#define NV_AON_GPIO_PORT_CC_7		23
+
+/* AON GPIO port DD pins */
+#define NV_AON_GPIO_PORT_DD_0		24
+#define NV_AON_GPIO_PORT_DD_1		25
+#define NV_AON_GPIO_PORT_DD_2		26
+
+/* AON GPIO port EE pins */
+#define NV_AON_GPIO_PORT_EE_0		32
+#define NV_AON_GPIO_PORT_EE_1		33
+#define NV_AON_GPIO_PORT_EE_2		34
+#define NV_AON_GPIO_PORT_EE_3		35
+#define NV_AON_GPIO_PORT_EE_4		36
+#define NV_AON_GPIO_PORT_EE_5		37
+#define NV_AON_GPIO_PORT_EE_6		38
+
+
+#define HTE_TECTRL		0x0
+#define HTE_TETSCH		0x4
+#define HTE_TETSCL		0x8
+#define HTE_TESRC		0xC
+#define HTE_TECCV		0x10
+#define HTE_TEPCV		0x14
+#define HTE_TECMD		0x1C
+#define HTE_TESTATUS		0x20
+#define HTE_SLICE0_TETEN	0x40
+#define HTE_SLICE1_TETEN	0x60
+
+#define HTE_SLICE_SIZE		(HTE_SLICE1_TETEN - HTE_SLICE0_TETEN)
+
+#define HTE_TECTRL_ENABLE_ENABLE	0x1
+
+#define HTE_TECTRL_OCCU_SHIFT		0x8
+#define HTE_TECTRL_INTR_SHIFT		0x1
+#define HTE_TECTRL_INTR_ENABLE		0x1
+
+#define HTE_TESRC_SLICE_SHIFT		16
+#define HTE_TESRC_SLICE_DEFAULT_MASK	0xFF
+
+#define HTE_TECMD_CMD_POP		0x1
+
+#define HTE_TESTATUS_OCCUPANCY_SHIFT	8
+#define HTE_TESTATUS_OCCUPANCY_MASK	0xFF
+
+struct hte_slices {
+	u32 r_val;
+	unsigned long flags;
+	/* to prevent lines mapped to same slice updating its register */
+	spinlock_t s_lock;
+};
+
+struct tegra_hte_line_mapped {
+	int slice;
+	u32 bit_index;
+};
+
+struct tegra_hte_line_table {
+	int map_sz;
+	const struct tegra_hte_line_mapped *map;
+};
+
+struct tegra_hte_soc {
+	int hte_irq;
+	u32 itr_thrshld;
+	u32 conf_rval;
+	struct hte_slices *sl;
+	const struct tegra_hte_line_table *line_map;
+	struct hte_chip *chip;
+	void __iomem *regs;
+};
+
+static const struct tegra_hte_line_mapped tegra194_aon_gpio_map[] = {
+	/* gpio, slice, bit_index */
+	[NV_AON_GPIO_PORT_AA_0]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_11},
+	[NV_AON_GPIO_PORT_AA_1]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_10},
+	[NV_AON_GPIO_PORT_AA_2]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_9},
+	[NV_AON_GPIO_PORT_AA_3]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_8},
+	[NV_AON_GPIO_PORT_AA_4]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_7},
+	[NV_AON_GPIO_PORT_AA_5]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_6},
+	[NV_AON_GPIO_PORT_AA_6]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_5},
+	[NV_AON_GPIO_PORT_AA_7]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_4},
+	[NV_AON_GPIO_PORT_BB_0]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_3},
+	[NV_AON_GPIO_PORT_BB_1]  = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_2},
+	[NV_AON_GPIO_PORT_BB_2] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_1},
+	[NV_AON_GPIO_PORT_BB_3] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_0},
+	[12] = {NV_AON_SLICE_INVALID, 0},
+	[13] = {NV_AON_SLICE_INVALID, 0},
+	[14] = {NV_AON_SLICE_INVALID, 0},
+	[15] = {NV_AON_SLICE_INVALID, 0},
+	[NV_AON_GPIO_PORT_CC_0] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_22},
+	[NV_AON_GPIO_PORT_CC_1] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_21},
+	[NV_AON_GPIO_PORT_CC_2] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_20},
+	[NV_AON_GPIO_PORT_CC_3] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_19},
+	[NV_AON_GPIO_PORT_CC_4] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_18},
+	[NV_AON_GPIO_PORT_CC_5] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_17},
+	[NV_AON_GPIO_PORT_CC_6] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_16},
+	[NV_AON_GPIO_PORT_CC_7] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_15},
+	[NV_AON_GPIO_PORT_DD_0] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_14},
+	[NV_AON_GPIO_PORT_DD_1] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_13},
+	[NV_AON_GPIO_PORT_DD_2] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_12},
+	[27] = {NV_AON_SLICE_INVALID, 0},
+	[28] = {NV_AON_SLICE_INVALID, 0},
+	[29] = {NV_AON_SLICE_INVALID, 0},
+	[30] = {NV_AON_SLICE_INVALID, 0},
+	[31] = {NV_AON_SLICE_INVALID, 0},
+	[NV_AON_GPIO_PORT_EE_0] = {1, NV_AON_HTE_SLICE1_IRQ_GPIO_29},
+	[NV_AON_GPIO_PORT_EE_1] = {1, NV_AON_HTE_SLICE1_IRQ_GPIO_28},
+	[NV_AON_GPIO_PORT_EE_2] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_27},
+	[NV_AON_GPIO_PORT_EE_3] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_26},
+	[NV_AON_GPIO_PORT_EE_4] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_25},
+	[NV_AON_GPIO_PORT_EE_5] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_24},
+	[NV_AON_GPIO_PORT_EE_6] = {2, NV_AON_HTE_SLICE2_IRQ_GPIO_23},
+};
+
+static const struct tegra_hte_line_table aon_hte_map = {
+	.map_sz = ARRAY_SIZE(tegra194_aon_gpio_map),
+	.map = tegra194_aon_gpio_map,
+};
+
+static inline u32 tegra_hte_readl(struct tegra_hte_soc *hte, u32 reg)
+{
+	return readl(hte->regs + reg);
+}
+
+static inline void tegra_hte_writel(struct tegra_hte_soc *hte, u32 reg,
+				    u32 val)
+{
+	writel(val, hte->regs + reg);
+}
+
+static inline int tegra_hte_map_to_line_id(u32 eid, struct tegra_hte_soc *gs,
+					  u32 *mapped)
+{
+	const struct tegra_hte_line_mapped *m;
+
+	if (gs->line_map) {
+		m = gs->line_map->map;
+		if (eid > gs->line_map->map_sz)
+			return -EINVAL;
+		if (m[eid].slice == NV_AON_SLICE_INVALID)
+			return -EINVAL;
+
+		*mapped = (m[eid].slice << 5) + m[eid].bit_index;
+	} else {
+		*mapped = eid;
+	}
+
+	return 0;
+}
+
+static int tegra_hte_line_xlate(struct hte_chip *gc,
+				 const struct of_phandle_args *args,
+				 struct hte_ts_desc *desc, u32 *xlated_id)
+{
+	int ret = 0;
+
+	if (!gc || !desc || !xlated_id)
+		return -EINVAL;
+
+	if (args) {
+		if (gc->of_hte_n_cells < 1)
+			return -EINVAL;
+
+		if (args->args_count != gc->of_hte_n_cells)
+			return -EINVAL;
+
+		desc->con_id = args->args[0];
+	}
+
+	ret = tegra_hte_map_to_line_id(desc->con_id, gc->data,
+				       xlated_id);
+	if (ret < 0) {
+		dev_dbg(gc->dev, "con_id:%u mapping failed\n",
+			desc->con_id);
+		return ret;
+	}
+
+	if (*xlated_id > gc->nlines)
+		return -EINVAL;
+
+	dev_dbg(gc->dev, "requested id:%u, xlated id:%u\n",
+		desc->con_id, *xlated_id);
+
+	return 0;
+}
+
+static int tegra_hte_en_dis_common(struct hte_chip *chip, u32 line_id, bool en)
+{
+	u32 slice, sl_bit_shift, line_bit, val, reg;
+	struct tegra_hte_soc *gs;
+
+	sl_bit_shift = __builtin_ctz(HTE_SLICE_SIZE);
+
+	if (!chip)
+		return -EINVAL;
+
+	gs = (struct tegra_hte_soc *)chip->data;
+
+	if (line_id > chip->nlines) {
+		dev_err(chip->dev,
+			"line id: %u is not supported by this controller\n",
+			line_id);
+		return -EINVAL;
+	}
+
+	slice = line_id >> sl_bit_shift;
+	line_bit = line_id & (HTE_SLICE_SIZE - 1);
+	reg = (slice << sl_bit_shift) + HTE_SLICE0_TETEN;
+
+	spin_lock(&gs->sl[slice].s_lock);
+
+	if (test_bit(HTE_SUSPEND, &gs->sl[slice].flags)) {
+		spin_unlock(&gs->sl[slice].s_lock);
+		dev_dbg(chip->dev, "device suspended");
+		return -EBUSY;
+	}
+
+	val = tegra_hte_readl(gs, reg);
+	if (en)
+		val = val | (1 << line_bit);
+	else
+		val = val & (~(1 << line_bit));
+	tegra_hte_writel(gs, reg, val);
+
+	spin_unlock(&gs->sl[slice].s_lock);
+
+	dev_dbg(chip->dev, "line: %u, slice %u, line_bit %u, reg:0x%x\n",
+		line_id, slice, line_bit, reg);
+
+	return 0;
+}
+
+static int tegra_hte_request(struct hte_chip *chip, u32 line_id)
+{
+	return tegra_hte_en_dis_common(chip, line_id, true);
+}
+
+static int tegra_hte_release(struct hte_chip *chip, u32 line_id)
+{
+	return tegra_hte_en_dis_common(chip, line_id, false);
+}
+
+static int tegra_hte_clk_src_info(struct hte_chip *chip,
+				  struct hte_clk_info *ci)
+{
+	(void)chip;
+
+	ci->hz = HTE_TS_CLK_RATE_HZ;
+	ci->type = CLOCK_MONOTONIC;
+
+	return 0;
+}
+
+static void tegra_hte_read_fifo(struct tegra_hte_soc *gs)
+{
+	u32 tsh, tsl, src, pv, cv, acv, slice, bit_index, line_id;
+	u64 tsc;
+	int dir;
+	struct hte_ts_data el;
+
+	while ((tegra_hte_readl(gs, HTE_TESTATUS) >>
+		HTE_TESTATUS_OCCUPANCY_SHIFT) &
+		HTE_TESTATUS_OCCUPANCY_MASK) {
+		tsh = tegra_hte_readl(gs, HTE_TETSCH);
+		tsl = tegra_hte_readl(gs, HTE_TETSCL);
+		tsc = (((u64)tsh << 32) | tsl);
+
+		src = tegra_hte_readl(gs, HTE_TESRC);
+		slice = (src >> HTE_TESRC_SLICE_SHIFT) &
+			    HTE_TESRC_SLICE_DEFAULT_MASK;
+
+		pv = tegra_hte_readl(gs, HTE_TEPCV);
+		cv = tegra_hte_readl(gs, HTE_TECCV);
+		acv = pv ^ cv;
+		while (acv) {
+			bit_index = __builtin_ctz(acv);
+			if ((pv >> bit_index) & BIT(0))
+				dir = HTE_EVENT_RISING_EDGE;
+			else
+				dir = HTE_EVENT_FALLING_EDGE;
+
+			line_id = bit_index + (slice << 5);
+			el.dir = dir;
+			el.tsc = tsc << HTE_TS_NS_SHIFT;
+			hte_push_ts_ns_atomic(gs->chip, line_id, &el,
+					      sizeof(el));
+			acv &= ~BIT(bit_index);
+		}
+		tegra_hte_writel(gs, HTE_TECMD, HTE_TECMD_CMD_POP);
+	}
+}
+
+static irqreturn_t tegra_hte_isr(int irq, void *dev_id)
+{
+	struct tegra_hte_soc *gs = dev_id;
+
+	tegra_hte_read_fifo(gs);
+
+	return IRQ_HANDLED;
+}
+
+static const struct of_device_id tegra_hte_of_match[] = {
+	{ .compatible = "nvidia,tegra194-gte-lic"},
+	{ .compatible = "nvidia,tegra194-gte-aon", .data = &aon_hte_map},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tegra_hte_of_match);
+
+static const struct hte_ops g_ops = {
+	.request = tegra_hte_request,
+	.release = tegra_hte_release,
+	.enable = tegra_hte_request,
+	.disable = tegra_hte_release,
+	.get_clk_src_info = tegra_hte_clk_src_info,
+};
+
+static int tegra_hte_probe(struct platform_device *pdev)
+{
+	int ret;
+	u32 i, slices, val = 0;
+	struct device *dev;
+	struct tegra_hte_soc *hte_dev;
+	struct hte_chip *gc;
+
+	dev = &pdev->dev;
+
+	hte_dev = devm_kzalloc(dev, sizeof(*hte_dev), GFP_KERNEL);
+	if (!hte_dev)
+		return -ENOMEM;
+
+	gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL);
+	if (!gc)
+		return -ENOMEM;
+
+	dev_set_drvdata(&pdev->dev, hte_dev);
+	hte_dev->line_map = of_device_get_match_data(&pdev->dev);
+
+	hte_dev->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(hte_dev->regs))
+		return PTR_ERR(hte_dev->regs);
+
+	ret = of_property_read_u32(dev->of_node, "int-threshold",
+				   &hte_dev->itr_thrshld);
+	if (ret != 0)
+		hte_dev->itr_thrshld = 1;
+
+	ret = of_property_read_u32(dev->of_node, "slices", &slices);
+	if (ret != 0) {
+		dev_err(dev, "Could not read slices\n");
+		return -EINVAL;
+	}
+
+	hte_dev->sl = devm_kzalloc(dev, sizeof(struct hte_slices) * slices,
+				   GFP_KERNEL);
+	if (!hte_dev->sl)
+		return -ENOMEM;
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(dev, "get irq failed.\n");
+		return ret;
+	}
+	hte_dev->hte_irq = ret;
+	ret = devm_request_irq(dev, hte_dev->hte_irq, tegra_hte_isr, 0,
+			       dev_name(dev), hte_dev);
+	if (ret < 0) {
+		dev_err(dev, "request irq failed.\n");
+		return ret;
+	}
+
+	gc->nlines = slices << 5;
+	gc->ops = &g_ops;
+	gc->dev = dev;
+	hte_dev->chip = gc;
+	gc->data = (void *)hte_dev;
+	gc->xlate = tegra_hte_line_xlate;
+	gc->of_hte_n_cells = 1;
+
+	ret = hte_register_chip(hte_dev->chip);
+
+	if (ret)
+		dev_err(gc->dev, "hte chip register failed");
+
+	for (i = 0; i < slices; i++) {
+		hte_dev->sl[i].flags = 0;
+		spin_lock_init(&hte_dev->sl[i].s_lock);
+	}
+
+	val = HTE_TECTRL_ENABLE_ENABLE |
+	      (HTE_TECTRL_INTR_ENABLE << HTE_TECTRL_INTR_SHIFT) |
+	      (hte_dev->itr_thrshld << HTE_TECTRL_OCCU_SHIFT);
+	tegra_hte_writel(hte_dev, HTE_TECTRL, val);
+
+	dev_dbg(gc->dev, "lines: %d, slices:%d", gc->nlines, slices);
+	return 0;
+}
+
+static int tegra_hte_remove(struct platform_device *pdev)
+{
+	struct tegra_hte_soc *gs = dev_get_drvdata(&pdev->dev);
+
+	tegra_hte_writel(gs, HTE_TECTRL, 0);
+
+	return hte_unregister_chip(gs->chip);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int tegra_hte_resume_early(struct device *dev)
+{
+	u32 i;
+	struct tegra_hte_soc *gs = dev_get_drvdata(dev);
+	u32 slices = gs->chip->nlines >> 5;
+	u32 sl_bit_shift = __builtin_ctz(HTE_SLICE_SIZE);
+
+	tegra_hte_writel(gs, HTE_TECTRL, gs->conf_rval);
+
+	for (i = 0; i < slices; i++) {
+		spin_lock(&gs->sl[i].s_lock);
+		tegra_hte_writel(gs,
+				 ((i << sl_bit_shift) + HTE_SLICE0_TETEN),
+				 gs->sl[i].r_val);
+		clear_bit(HTE_SUSPEND, &gs->sl[i].flags);
+		spin_unlock(&gs->sl[i].s_lock);
+	}
+
+	return 0;
+}
+
+static int tegra_hte_suspend_late(struct device *dev)
+{
+	u32 i;
+	struct tegra_hte_soc *gs = dev_get_drvdata(dev);
+	u32 slices = gs->chip->nlines >> 5;
+	u32 sl_bit_shift = __builtin_ctz(HTE_SLICE_SIZE);
+
+	gs->conf_rval = tegra_hte_readl(gs, HTE_TECTRL);
+	for (i = 0; i < slices; i++) {
+		spin_lock(&gs->sl[i].s_lock);
+		gs->sl[i].r_val = tegra_hte_readl(gs,
+				((i << sl_bit_shift) + HTE_SLICE0_TETEN));
+		set_bit(HTE_SUSPEND, &gs->sl[i].flags);
+		spin_unlock(&gs->sl[i].s_lock);
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops tegra_hte_pm = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(tegra_hte_suspend_late,
+				     tegra_hte_resume_early)
+};
+
+static struct platform_driver tegra_hte_driver = {
+	.probe = tegra_hte_probe,
+	.remove = tegra_hte_remove,
+	.driver = {
+		.name = "tegra_hte",
+		.pm = &tegra_hte_pm,
+		.of_match_table = tegra_hte_of_match,
+	},
+};
+
+module_platform_driver(tegra_hte_driver);
+
+MODULE_AUTHOR("Dipen Patel <dipenp@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra HTE (Hardware Timestamping Engine) driver");
+MODULE_LICENSE("GPL v2");