diff mbox series

[net-next] net: ibm: replenish rx pool and poll less frequently

Message ID 20210602170156.41643-1-lijunp213@gmail.com
State New
Headers show
Series [net-next] net: ibm: replenish rx pool and poll less frequently | expand

Commit Message

Lijun Pan June 2, 2021, 5:01 p.m. UTC
The old mechanism replenishes rx pool even only one frames is processed in
the poll function, which causes lots of overheads. The old mechanism
restarts polling until processed frames reaches the budget, which can
cause the poll function to loop into restart_poll 63 times at most and to
call replenish_rx_poll 63 times at most. This will cause soft lockup very
easily. So, don't replenish too often, and don't goto restart_poll in each
poll function. If there are pending descriptors, fetch them in the next
poll instance.

Signed-off-by: Lijun Pan <lijunp213@gmail.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

Comments

Rick Lindsley June 2, 2021, 5:47 p.m. UTC | #1
On 6/2/21 10:01 AM, Lijun Pan wrote:
> The old mechanism replenishes rx pool even only one frames is processed in
> the poll function, which causes lots of overheads. The old mechanism
> restarts polling until processed frames reaches the budget, which can
> cause the poll function to loop into restart_poll 63 times at most and to
> call replenish_rx_poll 63 times at most.

Presumably this frequency is to keep ahead of demand.  When you say lots
of overhead - can you attach a number to that?  How much does this change
improve on that number (what workload benefits?)
Dany Madden June 2, 2021, 5:58 p.m. UTC | #2
On 2021-06-02 10:01, Lijun Pan wrote:
> The old mechanism replenishes rx pool even only one frames is processed 
> in
> the poll function, which causes lots of overheads. The old mechanism
> restarts polling until processed frames reaches the budget, which can
> cause the poll function to loop into restart_poll 63 times at most and 
> to
> call replenish_rx_poll 63 times at most. This will cause soft lockup 
> very
> easily. So, don't replenish too often, and don't goto restart_poll in 
> each
> poll function. If there are pending descriptors, fetch them in the next
> poll instance.

Does this improve performance?

> 
> Signed-off-by: Lijun Pan <lijunp213@gmail.com>
> ---
>  drivers/net/ethernet/ibm/ibmvnic.c | 15 +++------------
>  1 file changed, 3 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
> b/drivers/net/ethernet/ibm/ibmvnic.c
> index ffb2a91750c7..fae1eaa39dd0 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -2435,7 +2435,6 @@ static int ibmvnic_poll(struct napi_struct
> *napi, int budget)
>  	frames_processed = 0;
>  	rx_scrq = adapter->rx_scrq[scrq_num];
> 
> -restart_poll:
>  	while (frames_processed < budget) {
>  		struct sk_buff *skb;
>  		struct ibmvnic_rx_buff *rx_buff;
> @@ -2512,20 +2511,12 @@ static int ibmvnic_poll(struct napi_struct
> *napi, int budget)
>  	}
> 
>  	if (adapter->state != VNIC_CLOSING &&
> -	    ((atomic_read(&adapter->rx_pool[scrq_num].available) <
> -	      adapter->req_rx_add_entries_per_subcrq / 2) ||
> -	      frames_processed < budget))

There is a budget that the driver should adhere to. Even one frame, it 
should still process the frame within a budget.

> +	    (atomic_read(&adapter->rx_pool[scrq_num].available) <
> +	      adapter->req_rx_add_entries_per_subcrq / 2))
>  		replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
>  	if (frames_processed < budget) {
> -		if (napi_complete_done(napi, frames_processed)) {
> +		if (napi_complete_done(napi, frames_processed))
>  			enable_scrq_irq(adapter, rx_scrq);
> -			if (pending_scrq(adapter, rx_scrq)) {
> -				if (napi_reschedule(napi)) {
> -					disable_scrq_irq(adapter, rx_scrq);
> -					goto restart_poll;
> -				}
> -			}
> -		}
>  	}
>  	return frames_processed;
>  }
Dany Madden June 2, 2021, 6:23 p.m. UTC | #3
On 2021-06-02 10:58, Dany Madden wrote:
> On 2021-06-02 10:01, Lijun Pan wrote:
>> The old mechanism replenishes rx pool even only one frames is 
>> processed in
>> the poll function, which causes lots of overheads. The old mechanism
>> restarts polling until processed frames reaches the budget, which can
>> cause the poll function to loop into restart_poll 63 times at most and 
>> to
>> call replenish_rx_poll 63 times at most. This will cause soft lockup 
>> very
>> easily. So, don't replenish too often, and don't goto restart_poll in 
>> each
>> poll function. If there are pending descriptors, fetch them in the 
>> next
>> poll instance.
> 
> Does this improve performance?
> 
>> 
>> Signed-off-by: Lijun Pan <lijunp213@gmail.com>
>> ---
>>  drivers/net/ethernet/ibm/ibmvnic.c | 15 +++------------
>>  1 file changed, 3 insertions(+), 12 deletions(-)
>> 
>> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
>> b/drivers/net/ethernet/ibm/ibmvnic.c
>> index ffb2a91750c7..fae1eaa39dd0 100644
>> --- a/drivers/net/ethernet/ibm/ibmvnic.c
>> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
>> @@ -2435,7 +2435,6 @@ static int ibmvnic_poll(struct napi_struct
>> *napi, int budget)
>>  	frames_processed = 0;
>>  	rx_scrq = adapter->rx_scrq[scrq_num];
>> 
>> -restart_poll:
>>  	while (frames_processed < budget) {
>>  		struct sk_buff *skb;
>>  		struct ibmvnic_rx_buff *rx_buff;
>> @@ -2512,20 +2511,12 @@ static int ibmvnic_poll(struct napi_struct
>> *napi, int budget)
>>  	}
>> 
>>  	if (adapter->state != VNIC_CLOSING &&
>> -	    ((atomic_read(&adapter->rx_pool[scrq_num].available) <
>> -	      adapter->req_rx_add_entries_per_subcrq / 2) ||
>> -	      frames_processed < budget))
> 
> There is a budget that the driver should adhere to. Even one frame, it
> should still process the frame within a budget.
I meant it should replenish the buffer because the commit that added 
this check, 
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=41ed0a00ffcd903ece4304a4a65d95706115ffcb, 
stated that low frame_processed means low incoming packets, so use the 
time to refill the buffers.

So, it would be good to see some numbers of how this change is doing in 
comparison to the code before.

> 
>> +	    (atomic_read(&adapter->rx_pool[scrq_num].available) <
>> +	      adapter->req_rx_add_entries_per_subcrq / 2))
>>  		replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
>>  	if (frames_processed < budget) {
>> -		if (napi_complete_done(napi, frames_processed)) {
>> +		if (napi_complete_done(napi, frames_processed))
>>  			enable_scrq_irq(adapter, rx_scrq);
>> -			if (pending_scrq(adapter, rx_scrq)) {
>> -				if (napi_reschedule(napi)) {
>> -					disable_scrq_irq(adapter, rx_scrq);
>> -					goto restart_poll;
>> -				}
>> -			}
>> -		}
>>  	}
>>  	return frames_processed;
>>  }
diff mbox series

Patch

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ffb2a91750c7..fae1eaa39dd0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2435,7 +2435,6 @@  static int ibmvnic_poll(struct napi_struct *napi, int budget)
 	frames_processed = 0;
 	rx_scrq = adapter->rx_scrq[scrq_num];
 
-restart_poll:
 	while (frames_processed < budget) {
 		struct sk_buff *skb;
 		struct ibmvnic_rx_buff *rx_buff;
@@ -2512,20 +2511,12 @@  static int ibmvnic_poll(struct napi_struct *napi, int budget)
 	}
 
 	if (adapter->state != VNIC_CLOSING &&
-	    ((atomic_read(&adapter->rx_pool[scrq_num].available) <
-	      adapter->req_rx_add_entries_per_subcrq / 2) ||
-	      frames_processed < budget))
+	    (atomic_read(&adapter->rx_pool[scrq_num].available) <
+	      adapter->req_rx_add_entries_per_subcrq / 2))
 		replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
 	if (frames_processed < budget) {
-		if (napi_complete_done(napi, frames_processed)) {
+		if (napi_complete_done(napi, frames_processed))
 			enable_scrq_irq(adapter, rx_scrq);
-			if (pending_scrq(adapter, rx_scrq)) {
-				if (napi_reschedule(napi)) {
-					disable_scrq_irq(adapter, rx_scrq);
-					goto restart_poll;
-				}
-			}
-		}
 	}
 	return frames_processed;
 }