@@ -89,11 +89,22 @@ static mempool_t *sg_sense_pool;
#define cuptr64(usp_val) ((const void __user *)(uintptr_t)(usp_val))
/* Following enum contains the states of sg_request::rq_st */
-enum sg_rq_state { /* N.B. sg_rq_state_arr assumes SG_RS_AWAIT_RCV==2 */
- SG_RS_INACTIVE = 0, /* request not in use (e.g. on fl) */
- SG_RS_INFLIGHT, /* active: cmd/req issued, no response yet */
- SG_RS_AWAIT_RCV, /* have response from LLD, awaiting receive */
- SG_RS_BUSY, /* temporary state should rarely be seen */
+enum sg_rq_state { /* N.B. sg_rq_state_arr assumes SG_RQ_AWAIT_RCV==2 */
+ SG_RQ_INACTIVE = 0, /* request not in use (e.g. on fl) */
+ SG_RQ_INFLIGHT, /* active: cmd/req issued, no response yet */
+ SG_RQ_AWAIT_RCV, /* have response from LLD, awaiting receive */
+ SG_RQ_BUSY, /* temporary state should rarely be seen */
+ SG_RQ_SHR_SWAP, /* read-side: is finished, await swap to write-side */
+ SG_RQ_SHR_IN_WS, /* read-side: waits while write-side inflight */
+};
+
+/* write-side sets up sharing: ioctl(ws_fd,SG_SET_GET_EXTENDED(SHARE_FD(rs_fd))) */
+enum sg_shr_var {
+ SG_SHR_NONE = 0, /* no sharing on this fd, so _not_ shared request */
+ SG_SHR_RS_NOT_SRQ, /* read-side fd but _not_ shared request */
+ SG_SHR_RS_RQ, /* read-side sharing on this request */
+ SG_SHR_WS_NOT_SRQ, /* write-side fd but _not_ shared request */
+ SG_SHR_WS_RQ, /* write-side sharing on this request */
};
/* If sum_of(dlen) of a fd exceeds this, write() will yield E2BIG */
@@ -119,13 +130,13 @@ enum sg_rq_state { /* N.B. sg_rq_state_arr assumes SG_RS_AWAIT_RCV==2 */
#define SG_FRQ_IS_V4I 0 /* true (set) when is v4 interface */
#define SG_FRQ_IS_ORPHAN 1 /* owner of request gone */
#define SG_FRQ_SYNC_INVOC 2 /* synchronous (blocking) invocation */
-#define SG_FRQ_NO_US_XFER 3 /* no user space transfer of data */
+#define SG_FRQ_US_XFER 3 /* kernel<-->user_space data transfer */
#define SG_FRQ_ABORTING 4 /* in process of aborting this cmd */
-#define SG_FRQ_DEACT_ORPHAN 6 /* not keeping orphan so de-activate */
-#define SG_FRQ_RECEIVING 7 /* guard against multiple receivers */
-#define SG_FRQ_FOR_MMAP 8 /* request needs PAGE_SIZE elements */
-#define SG_FRQ_COUNT_ACTIVE 9 /* sfp->submitted + waiting active */
-#define SG_FRQ_ISSUED 10 /* blk_execute_rq_nowait() finished */
+#define SG_FRQ_DEACT_ORPHAN 5 /* not keeping orphan so de-activate */
+#define SG_FRQ_RECEIVING 6 /* guard against multiple receivers */
+#define SG_FRQ_FOR_MMAP 7 /* request needs PAGE_SIZE elements */
+#define SG_FRQ_COUNT_ACTIVE 8 /* sfp->submitted + waiting active */
+#define SG_FRQ_ISSUED 9 /* blk_execute_rq_nowait() finished */
/* Bit positions (flags) for sg_fd::ffd_bm bitmask follow */
#define SG_FFD_FORCE_PACKID 0 /* receive only given pack_id/tag */
@@ -134,10 +145,11 @@ enum sg_rq_state { /* N.B. sg_rq_state_arr assumes SG_RS_AWAIT_RCV==2 */
#define SG_FFD_HIPRI_SEEN 3 /* could have HIPRI requests active */
#define SG_FFD_TIME_IN_NS 4 /* set: time in nanoseconds, else ms */
#define SG_FFD_Q_AT_TAIL 5 /* set: queue reqs at tail of blk q */
-#define SG_FFD_PREFER_TAG 6 /* prefer tag over pack_id (def) */
-#define SG_FFD_RELEASE 7 /* release (close) underway */
-#define SG_FFD_NO_DURATION 8 /* don't do command duration calc */
-#define SG_FFD_MORE_ASYNC 9 /* yield EBUSY more often */
+#define SG_FFD_READ_SIDE_ERR 6 /* prior read-side of share failed */
+#define SG_FFD_PREFER_TAG 7 /* prefer tag over pack_id (def) */
+#define SG_FFD_RELEASE 8 /* release (close) underway */
+#define SG_FFD_NO_DURATION 9 /* don't do command duration calc */
+#define SG_FFD_MORE_ASYNC 10 /* yield EBUSY more often */
/* Bit positions (flags) for sg_device::fdev_bm bitmask follow */
#define SG_FDEV_EXCLUDE 0 /* have fd open with O_EXCL */
@@ -216,6 +228,7 @@ struct sg_fd;
struct sg_request { /* active SCSI command or inactive request */
struct sg_scatter_hold sgat_h; /* hold buffer, perhaps scatter list */
+ struct sg_scatter_hold *sgatp; /* ptr to prev unless write-side shr req */
union {
struct sg_slice_hdr3 s_hdr3; /* subset of sg_io_hdr */
struct sg_slice_hdr4 s_hdr4; /* reduced size struct sg_io_v4 */
@@ -229,6 +242,7 @@ struct sg_request { /* active SCSI command or inactive request */
int pack_id; /* v3 pack_id or in v4 request_extra field */
int sense_len; /* actual sense buffer length (data-in) */
atomic_t rq_st; /* request state, holds a enum sg_rq_state */
+ enum sg_shr_var sh_var; /* sharing variety, SG_SHR_NONE=0 if none */
u8 cmd_opcode; /* first byte of SCSI cdb */
int tag; /* block layer identifier of request */
blk_qc_t cookie; /* ids 1 or more queues for blk_poll() */
@@ -237,7 +251,7 @@ struct sg_request { /* active SCSI command or inactive request */
u8 *sense_bp; /* mempool alloc-ed sense buffer, as needed */
struct sg_fd *parentfp; /* pointer to owning fd, even when on fl */
struct request *rqq; /* released in sg_rq_end_io(), bio kept */
- struct bio *bio; /* kept until this req -->SG_RS_INACTIVE */
+ struct bio *bio; /* kept until this req -->SG_RQ_INACTIVE */
struct execute_work ew_orph; /* harvest orphan request */
};
@@ -262,6 +276,7 @@ struct sg_fd { /* holds the state of a file descriptor */
unsigned long ffd_bm[1]; /* see SG_FFD_* defines above */
struct file *filp; /* my identity when sharing */
struct sg_request *rsv_srp;/* one reserve request per fd */
+ struct sg_request *ws_srp; /* when rsv SG_SHR_RS_RQ, ptr to write-side */
struct sg_fd __rcu *share_sfp;/* fd share cross-references, else NULL */
struct fasync_struct *async_qp; /* used by asynchronous notification */
struct xarray srp_arr; /* xarray of sg_request object pointers */
@@ -317,10 +332,11 @@ static int sg_read_append(struct sg_request *srp, void __user *outp,
static void sg_remove_sgat(struct sg_request *srp);
static struct sg_fd *sg_add_sfp(struct sg_device *sdp, struct file *filp);
static void sg_remove_sfp(struct kref *);
+static void sg_remove_sfp_share(struct sg_fd *sfp, bool is_rd_side);
static struct sg_request *sg_find_srp_by_id(struct sg_fd *sfp, int id,
bool is_tag);
static struct sg_request *sg_setup_req(struct sg_comm_wr_t *cwrp,
- int dxfr_len);
+ enum sg_shr_var sh_var, int dxfr_len);
static void sg_deact_request(struct sg_fd *sfp, struct sg_request *srp);
static struct sg_device *sg_get_dev(int min_dev);
static void sg_device_destroy(struct kref *kref);
@@ -331,6 +347,7 @@ static int sg_srp_q_blk_poll(struct sg_request *srp, struct request_queue *q,
int loop_count);
#if IS_ENABLED(CONFIG_SCSI_LOGGING) && IS_ENABLED(SG_DEBUG)
static const char *sg_rq_st_str(enum sg_rq_state rq_st, bool long_str);
+static const char *sg_shr_str(enum sg_shr_var sh_var, bool long_str);
#endif
#define SG_WRITE_COUNT_LIMIT (32 * 1024 * 1024)
@@ -345,7 +362,9 @@ static const char *sg_rq_st_str(enum sg_rq_state rq_st, bool long_str);
#define SG_IS_DETACHING(sdp) test_bit(SG_FDEV_DETACHING, (sdp)->fdev_bm)
#define SG_HAVE_EXCLUDE(sdp) test_bit(SG_FDEV_EXCLUDE, (sdp)->fdev_bm)
-#define SG_RS_ACTIVE(srp) (atomic_read(&(srp)->rq_st) != SG_RS_INACTIVE)
+#define SG_IS_O_NONBLOCK(sfp) (!!((sfp)->filp->f_flags & O_NONBLOCK))
+#define SG_RQ_ACTIVE(srp) (atomic_read(&(srp)->rq_st) != SG_RQ_INACTIVE)
+// #define SG_RQ_THIS_RQ(srp) ((srp)->sh_var == SG_SHR_RS_RQ)
/*
* Kernel needs to be built with CONFIG_SCSI_LOGGING to see log messages.
@@ -427,7 +446,7 @@ sg_wait_open_event(struct sg_device *sdp, bool o_excl)
mutex_unlock(&sdp->open_rel_lock);
res = wait_event_interruptible
(sdp->open_wait,
- (SG_IS_DETACHING(sdp) ||
+ (unlikely(SG_IS_DETACHING(sdp)) ||
atomic_read(&sdp->open_cnt) == 0));
mutex_lock(&sdp->open_rel_lock);
@@ -441,7 +460,7 @@ sg_wait_open_event(struct sg_device *sdp, bool o_excl)
mutex_unlock(&sdp->open_rel_lock);
res = wait_event_interruptible
(sdp->open_wait,
- (SG_IS_DETACHING(sdp) ||
+ (unlikely(SG_IS_DETACHING(sdp)) ||
!SG_HAVE_EXCLUDE(sdp)));
mutex_lock(&sdp->open_rel_lock);
@@ -497,7 +516,7 @@ sg_open(struct inode *inode, struct file *filp)
nonseekable_open(inode, filp);
o_excl = !!(op_flags & O_EXCL);
non_block = !!(op_flags & O_NONBLOCK);
- if (o_excl && ((op_flags & O_ACCMODE) == O_RDONLY))
+ if (unlikely(o_excl) && ((op_flags & O_ACCMODE) == O_RDONLY))
return -EPERM;/* not permitted, need write access for O_EXCL */
sdp = sg_get_dev(min_dev); /* increments sdp->d_ref */
if (IS_ERR(sdp))
@@ -572,8 +591,15 @@ sg_open(struct inode *inode, struct file *filp)
goto sg_put;
}
+static inline bool
+sg_fd_is_shared(struct sg_fd *sfp)
+{
+ return !xa_get_mark(&sfp->parentdp->sfp_arr, sfp->idx,
+ SG_XA_FD_UNSHARED);
+}
+
static inline struct sg_fd *
-sg_fd_shared_ptr(struct sg_fd *sfp)
+sg_fd_share_ptr(struct sg_fd *sfp)
{
struct sg_fd *res_sfp;
struct sg_device *sdp = sfp->parentdp;
@@ -618,6 +644,10 @@ sg_release(struct inode *inode, struct file *filp)
SG_LOG(1, sfp, "%s: second release on this fd ? ?\n",
__func__);
scsi_autopm_put_device(sdp->device);
+ if (!xa_get_mark(&sdp->sfp_arr, sfp->idx, SG_XA_FD_FREE) &&
+ sg_fd_is_shared(sfp))
+ sg_remove_sfp_share(sfp, xa_get_mark(&sdp->sfp_arr, sfp->idx,
+ SG_XA_FD_RS_SHARE));
kref_put(&sfp->f_ref, sg_remove_sfp);
/*
@@ -826,7 +856,7 @@ sg_submit_v3(struct sg_fd *sfp, struct sg_io_hdr *hp, bool sync,
if (hp->flags & SG_FLAG_MMAP_IO) {
int res = sg_chk_mmap(sfp, hp->flags, hp->dxfer_len);
- if (res)
+ if (unlikely(res))
return res;
}
/* when v3 seen, allow cmd_q on this fd (def: no cmd_q) */
@@ -864,7 +894,7 @@ sg_submit_v4(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p,
else if (h4p->dout_xferp)
len = h4p->dout_xfer_len;
res = sg_chk_mmap(sfp, h4p->flags, len);
- if (res)
+ if (unlikely(res))
return res;
}
/* once v4 (or v3) seen, allow cmd_q on this fd (def: no cmd_q) */
@@ -902,7 +932,7 @@ sg_ctl_iosubmit(struct sg_fd *sfp, void __user *p)
struct sg_io_v4 *h4p = (struct sg_io_v4 *)hdr_store;
struct sg_device *sdp = sfp->parentdp;
- res = sg_allow_if_err_recovery(sdp, (sfp->filp->f_flags & O_NONBLOCK));
+ res = sg_allow_if_err_recovery(sdp, SG_IS_O_NONBLOCK(sfp));
if (res)
return res;
if (copy_from_user(hdr_store, p, SZ_SG_IO_V4))
@@ -920,7 +950,7 @@ sg_ctl_iosubmit_v3(struct sg_fd *sfp, void __user *p)
struct sg_io_hdr *h3p = (struct sg_io_hdr *)hdr_store;
struct sg_device *sdp = sfp->parentdp;
- res = sg_allow_if_err_recovery(sdp, (sfp->filp->f_flags & O_NONBLOCK));
+ res = sg_allow_if_err_recovery(sdp, SG_IS_O_NONBLOCK(sfp));
if (unlikely(res))
return res;
if (copy_from_user(h3p, p, SZ_SG_IO_HDR))
@@ -930,6 +960,54 @@ sg_ctl_iosubmit_v3(struct sg_fd *sfp, void __user *p)
return -EPERM;
}
+/*
+ * Assumes sharing has been established at the file descriptor level and now we
+ * check the rq_flags of a new request/command. SGV4_FLAG_NO_DXFER may or may
+ * not be used on the read-side, it must be used on the write-side. Also
+ * returns (via *sh_varp) the proposed sg_request::sh_var of the new request
+ * yet to be built/re-used.
+ */
+static int
+sg_share_chk_flags(struct sg_fd *sfp, u32 rq_flags, int dxfer_len, int dir,
+ enum sg_shr_var *sh_varp)
+{
+ bool is_read_side = xa_get_mark(&sfp->parentdp->sfp_arr, sfp->idx,
+ SG_XA_FD_RS_SHARE);
+ int result = 0;
+ enum sg_shr_var sh_var = SG_SHR_NONE;
+
+ if (rq_flags & SGV4_FLAG_SHARE) {
+ if (rq_flags & SG_FLAG_DIRECT_IO)
+ result = -EINVAL; /* since no control of data buffer */
+ else if (dxfer_len < 1)
+ result = -ENODATA;
+ else if (is_read_side) {
+ sh_var = SG_SHR_RS_RQ;
+ if (dir != SG_DXFER_FROM_DEV)
+ result = -ENOMSG;
+ if (rq_flags & SGV4_FLAG_NO_DXFER) {
+ /* rule out some contradictions */
+ if (rq_flags & SG_FL_MMAP_DIRECT)
+ result = -ENODATA;
+ }
+ } else { /* fd is write-side */
+ sh_var = SG_SHR_WS_RQ;
+ if (dir != SG_DXFER_TO_DEV)
+ result = -ENOMSG;
+ if (!(rq_flags & SGV4_FLAG_NO_DXFER))
+ result = -ENOMSG;
+ if (rq_flags & SG_FL_MMAP_DIRECT)
+ result = -ENODATA;
+ }
+ } else if (is_read_side) {
+ sh_var = SG_SHR_RS_NOT_SRQ;
+ } else {
+ sh_var = SG_SHR_WS_NOT_SRQ;
+ }
+ *sh_varp = sh_var;
+ return result;
+}
+
#if IS_ENABLED(SG_LOG_ACTIVE)
static void
sg_rq_state_fail_msg(struct sg_fd *sfp, enum sg_rq_state exp_old_st,
@@ -949,38 +1027,6 @@ sg_rq_state_fail_msg(struct sg_fd *sfp, enum sg_rq_state exp_old_st,
#endif
/* Functions ending in '_ulck' assume sfp->xa_lock held by caller. */
-static void
-sg_rq_chg_state_force_ulck(struct sg_request *srp, enum sg_rq_state new_st)
-{
- bool prev, want;
- struct sg_fd *sfp = srp->parentfp;
- struct xarray *xafp = &sfp->srp_arr;
-
- atomic_set(&srp->rq_st, new_st);
- want = (new_st == SG_RS_AWAIT_RCV);
- prev = xa_get_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
- if (prev != want) {
- if (want)
- __xa_set_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
- else
- __xa_clear_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
- }
- want = (new_st == SG_RS_INACTIVE);
- prev = xa_get_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
- if (prev != want) {
- if (want) {
- int prev_idx = READ_ONCE(sfp->low_used_idx);
-
- if (prev_idx < 0 || srp->rq_idx < prev_idx ||
- !xa_get_mark(xafp, prev_idx, SG_XA_RQ_INACTIVE))
- WRITE_ONCE(sfp->low_used_idx, srp->rq_idx);
- __xa_set_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
- } else {
- __xa_clear_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
- }
- }
-}
-
static void
sg_rq_chg_state_help(struct xarray *xafp, struct sg_request *srp, int indic)
{
@@ -996,21 +1042,42 @@ sg_rq_chg_state_help(struct xarray *xafp, struct sg_request *srp, int indic)
}
/* Following array indexed by enum sg_rq_state, 0 means no xa mark change */
-static const int sg_rq_state_arr[] = {1, 0, 4, 0};
-static const int sg_rq_state_mul2arr[] = {2, 0, 8, 0};
+static const int sg_rq_state_arr[] = {1, 0, 4, 0, 0, 0};
+static const int sg_rq_state_mul2arr[] = {2, 0, 8, 0, 0, 0};
/*
* This function keeps the srp->rq_st state and associated marks on the
- * owning xarray's element in sync. If force is true then new_st is stored
- * in srp->rq_st and xarray marks are set accordingly (and old_st is
- * ignored); and 0 is returned.
- * If force is false, then atomic_cmpxchg() is called. If the actual
- * srp->rq_st is not old_st, then -EPROTOTYPE is returned. If the actual
- * srp->rq_st is old_st then it is replaced by new_st and the xarray marks
- * are setup accordingly and 0 is returned. This assumes srp_arr xarray
- * spinlock is held.
+ * owning xarray's element in sync. An attempt si made to change state with
+ * a call to atomic_cmpxchg(). If the actual srp->rq_st is not old_st, then
+ * -EPROTOTYPE is returned. If the actual srp->rq_st is old_st then it is
+ * replaced by new_st and the xarray marks are setup accordingly and 0 is
+ * returned. This assumes srp_arr xarray spinlock is held.
*/
static int
+sg_rq_chg_state_ulck(struct sg_request *srp, enum sg_rq_state old_st,
+ enum sg_rq_state new_st)
+{
+ enum sg_rq_state act_old_st;
+ int indic;
+
+ indic = sg_rq_state_arr[(int)old_st] +
+ sg_rq_state_mul2arr[(int)new_st];
+ act_old_st = (enum sg_rq_state)atomic_cmpxchg(&srp->rq_st, old_st,
+ new_st);
+ if (act_old_st != old_st) {
+#if IS_ENABLED(SG_LOG_ACTIVE)
+ SG_LOG(1, srp->parentfp, "%s: unexpected old state: %s\n",
+ __func__, sg_rq_st_str(act_old_st, false));
+#endif
+ return -EPROTOTYPE; /* only used for this error type */
+ }
+ if (indic)
+ sg_rq_chg_state_help(&srp->parentfp->srp_arr, srp, indic);
+ return 0;
+}
+
+/* Similar to sg_rq_chg_state_ulck() but uses the xarray spinlock */
+static int
sg_rq_chg_state(struct sg_request *srp, enum sg_rq_state old_st,
enum sg_rq_state new_st)
{
@@ -1030,7 +1097,7 @@ sg_rq_chg_state(struct sg_request *srp, enum sg_rq_state old_st,
sg_rq_st_str(act_old_st, false));
return -EPROTOTYPE; /* only used for this error type */
}
- if (new_st == SG_RS_INACTIVE) {
+ if (new_st == SG_RQ_INACTIVE) {
int prev_idx = READ_ONCE(sfp->low_used_idx);
if (prev_idx < 0 || srp->rq_idx < prev_idx ||
@@ -1050,6 +1117,38 @@ sg_rq_chg_state(struct sg_request *srp, enum sg_rq_state old_st,
return 0;
}
+static void
+sg_rq_chg_state_force_ulck(struct sg_request *srp, enum sg_rq_state new_st)
+{
+ bool prev, want;
+ struct sg_fd *sfp = srp->parentfp;
+ struct xarray *xafp = &sfp->srp_arr;
+
+ atomic_set(&srp->rq_st, new_st);
+ want = (new_st == SG_RQ_AWAIT_RCV);
+ prev = xa_get_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
+ if (prev != want) {
+ if (want)
+ __xa_set_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
+ else
+ __xa_clear_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
+ }
+ want = (new_st == SG_RQ_INACTIVE);
+ prev = xa_get_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
+ if (prev != want) {
+ if (want) {
+ int prev_idx = READ_ONCE(sfp->low_used_idx);
+
+ if (prev_idx < 0 || srp->rq_idx < prev_idx ||
+ !xa_get_mark(xafp, prev_idx, SG_XA_RQ_INACTIVE))
+ WRITE_ONCE(sfp->low_used_idx, srp->rq_idx);
+ __xa_set_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
+ } else {
+ __xa_clear_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
+ }
+ }
+}
+
static void
sg_rq_chg_state_force(struct sg_request *srp, enum sg_rq_state new_st)
{
@@ -1086,7 +1185,7 @@ sg_execute_cmd(struct sg_fd *sfp, struct sg_request *srp)
at_head = !(srp->rq_flags & SG_FLAG_Q_AT_TAIL);
kref_get(&sfp->f_ref); /* sg_rq_end_io() does kref_put(). */
- sg_rq_chg_state_force(srp, SG_RS_INFLIGHT);
+ sg_rq_chg_state_force(srp, SG_RQ_INFLIGHT);
/* >>>>>>> send cmd/req off to other levels <<<<<<<< */
if (!sync) {
@@ -1115,6 +1214,7 @@ sg_common_write(struct sg_comm_wr_t *cwrp)
int dxfr_len, dir;
int pack_id = SG_PACK_ID_WILDCARD;
u32 rq_flags;
+ enum sg_shr_var sh_var;
struct sg_fd *fp = cwrp->sfp;
struct sg_device *sdp = fp->parentdp;
struct sg_request *srp;
@@ -1145,10 +1245,19 @@ sg_common_write(struct sg_comm_wr_t *cwrp)
rq_flags = hi_p->flags;
pack_id = hi_p->pack_id;
}
+ if (sg_fd_is_shared(fp)) {
+ res = sg_share_chk_flags(fp, rq_flags, dxfr_len, dir, &sh_var);
+ if (unlikely(res < 0))
+ return ERR_PTR(res);
+ } else {
+ sh_var = SG_SHR_NONE;
+ if (rq_flags & SGV4_FLAG_SHARE)
+ return ERR_PTR(-ENOMSG);
+ }
if (dxfr_len >= SZ_256M)
return ERR_PTR(-EINVAL);
- srp = sg_setup_req(cwrp, dxfr_len);
+ srp = sg_setup_req(cwrp, sh_var, dxfr_len);
if (IS_ERR(srp))
return srp;
srp->rq_flags = rq_flags;
@@ -1235,8 +1344,6 @@ sg_copy_sense(struct sg_request *srp, bool v4_active)
sb_len_ret = min_t(int, sb_len_ret, sb_len);
if (copy_to_user(up, sbp, sb_len_ret))
sb_len_ret = -EFAULT;
- } else {
- sb_len_ret = 0;
}
mempool_free(sbp, sg_sense_pool);
}
@@ -1246,7 +1353,10 @@ sg_copy_sense(struct sg_request *srp, bool v4_active)
static int
sg_rec_state_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool v4_active)
{
+ int err = 0;
u32 rq_res = srp->rq_result;
+ enum sg_shr_var sh_var = srp->sh_var;
+ struct sg_fd *sh_sfp;
if (unlikely(srp->rq_result & 0xff)) {
int sb_len_wr = sg_copy_sense(srp, v4_active);
@@ -1256,9 +1366,86 @@ sg_rec_state_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool v4_active)
}
if (rq_res & SG_ML_RESULT_MSK)
srp->rq_info |= SG_INFO_CHECK;
+ if (test_bit(SG_FRQ_ABORTING, srp->frq_bm))
+ srp->rq_info |= SG_INFO_ABORTED;
+
+ sh_sfp = sg_fd_share_ptr(sfp);
+ if (sh_var == SG_SHR_WS_RQ && sg_fd_is_shared(sfp)) {
+ struct sg_request *rs_srp = sh_sfp->rsv_srp;
+ enum sg_rq_state mar_st = atomic_read(&rs_srp->rq_st);
+
+ switch (mar_st) {
+ case SG_RQ_SHR_SWAP:
+ case SG_RQ_SHR_IN_WS:
+ /* make read-side request available for re-use */
+ rs_srp->tag = SG_TAG_WILDCARD;
+ rs_srp->sh_var = SG_SHR_NONE;
+ sg_rq_chg_state_force(rs_srp, SG_RQ_INACTIVE);
+ atomic_inc(&sh_sfp->inactives);
+ break;
+ case SG_RQ_INACTIVE:
+ case SG_RQ_AWAIT_RCV:
+ sh_sfp->ws_srp = NULL;
+ break; /* nothing to do */
+ default:
+ err = -EPROTO; /* Logic error */
+ SG_LOG(1, sfp,
+ "%s: SHR_WS_RQ, bad read-side state: %s\n",
+ __func__, sg_rq_st_str(mar_st, true));
+ break; /* nothing to do */
+ }
+ }
if (unlikely(SG_IS_DETACHING(sfp->parentdp)))
srp->rq_info |= SG_INFO_DEVICE_DETACHING;
- return 0;
+ return err;
+}
+
+static void
+sg_complete_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool other_err)
+{
+ enum sg_rq_state sr_st = atomic_read(&srp->rq_st);
+
+ /* advance state machine, send signal to write-side if appropriate */
+ switch (srp->sh_var) {
+ case SG_SHR_RS_RQ:
+ {
+ int poll_type = POLL_OUT;
+ struct sg_fd *sh_sfp = sg_fd_share_ptr(sfp);
+
+ if ((srp->rq_result & SG_ML_RESULT_MSK) || other_err) {
+ set_bit(SG_FFD_READ_SIDE_ERR, sfp->ffd_bm);
+ if (sr_st != SG_RQ_BUSY)
+ sg_rq_chg_state_force(srp, SG_RQ_BUSY);
+ poll_type = POLL_HUP; /* "Hang-UP flag */
+ } else if (sr_st != SG_RQ_SHR_SWAP) {
+ sg_rq_chg_state_force(srp, SG_RQ_SHR_SWAP);
+ }
+ if (sh_sfp)
+ kill_fasync(&sh_sfp->async_qp, SIGPOLL,
+ poll_type);
+ }
+ break;
+ case SG_SHR_WS_RQ: /* cleanup both on write-side completion */
+ {
+ struct sg_fd *rs_sfp = sg_fd_share_ptr(sfp);
+
+ if (rs_sfp) {
+ rs_sfp->ws_srp = NULL;
+ if (rs_sfp->rsv_srp)
+ rs_sfp->rsv_srp->sh_var =
+ SG_SHR_RS_NOT_SRQ;
+ }
+ }
+ srp->sh_var = SG_SHR_WS_NOT_SRQ;
+ srp->sgatp = &srp->sgat_h;
+ if (sr_st != SG_RQ_BUSY)
+ sg_rq_chg_state_force(srp, SG_RQ_BUSY);
+ break;
+ default:
+ if (sr_st != SG_RQ_BUSY)
+ sg_rq_chg_state_force(srp, SG_RQ_BUSY);
+ break;
+ }
}
static int
@@ -1283,10 +1470,10 @@ sg_receive_v4(struct sg_fd *sfp, struct sg_request *srp, void __user *p,
h4p->duration = srp->duration;
switch (srp->s_hdr4.dir) {
case SG_DXFER_FROM_DEV:
- h4p->din_xfer_len = srp->sgat_h.dlen;
+ h4p->din_xfer_len = srp->sgatp->dlen;
break;
case SG_DXFER_TO_DEV:
- h4p->dout_xfer_len = srp->sgat_h.dlen;
+ h4p->dout_xfer_len = srp->sgatp->dlen;
break;
default:
break;
@@ -1302,6 +1489,7 @@ sg_receive_v4(struct sg_fd *sfp, struct sg_request *srp, void __user *p,
if (copy_to_user(p, h4p, SZ_SG_IO_V4))
err = err ? err : -EFAULT;
}
+ sg_complete_v3v4(sfp, srp, err < 0);
sg_finish_scsi_blk_rq(srp);
sg_deact_request(sfp, srp);
return err < 0 ? err : 0;
@@ -1317,7 +1505,7 @@ sg_receive_v4(struct sg_fd *sfp, struct sg_request *srp, void __user *p,
static int
sg_ctl_ioreceive(struct sg_fd *sfp, void __user *p)
{
- bool non_block = !!(sfp->filp->f_flags & O_NONBLOCK);
+ bool non_block = SG_IS_O_NONBLOCK(sfp);
bool use_tag = false;
int res, id;
int pack_id = SG_PACK_ID_WILDCARD;
@@ -1355,9 +1543,9 @@ sg_ctl_ioreceive(struct sg_fd *sfp, void __user *p)
return -ENODEV;
if (non_block)
return -EAGAIN;
- res = wait_event_interruptible(sfp->read_wait,
- sg_get_ready_srp(sfp, &srp,
- id, use_tag));
+ res = wait_event_interruptible
+ (sfp->read_wait,
+ sg_get_ready_srp(sfp, &srp, id, use_tag));
if (unlikely(SG_IS_DETACHING(sdp)))
return -ENODEV;
if (res)
@@ -1380,7 +1568,7 @@ sg_ctl_ioreceive(struct sg_fd *sfp, void __user *p)
static int
sg_ctl_ioreceive_v3(struct sg_fd *sfp, void __user *p)
{
- bool non_block = !!(sfp->filp->f_flags & O_NONBLOCK);
+ bool non_block = SG_IS_O_NONBLOCK(sfp);
int res;
int pack_id = SG_PACK_ID_WILDCARD;
u8 v3_holder[SZ_SG_IO_HDR];
@@ -1566,6 +1754,19 @@ sg_read(struct file *filp, char __user *p, size_t count, loff_t *ppos)
ret = get_user(want_id, &h3_up->pack_id);
if (ret)
return ret;
+ if (!non_block) {
+ int flgs;
+
+ ret = get_user(flgs, &h3_up->flags);
+ if (ret)
+ return ret;
+ if (flgs & SGV4_FLAG_IMMED)
+ non_block = true;
+ }
+ } else if (v3_hdr->interface_id == 'Q') {
+ pr_info_once("sg: %s: v4 interface%s here\n",
+ __func__, " disallowed");
+ return -EPERM;
} else {
return -EPERM;
}
@@ -1622,7 +1823,8 @@ sg_receive_v3(struct sg_fd *sfp, struct sg_request *srp, void __user *p)
struct sg_io_hdr hdr3;
struct sg_io_hdr *hp = &hdr3;
- SG_LOG(3, sfp, "%s: srp=0x%pK\n", __func__, srp);
+ SG_LOG(3, sfp, "%s: sh_var: %s srp=0x%pK\n", __func__,
+ sg_shr_str(srp->sh_var, false), srp);
err = sg_rec_state_v3v4(sfp, srp, false);
memset(hp, 0, sizeof(*hp));
memcpy(hp, &srp->s_hdr3, sizeof(srp->s_hdr3));
@@ -1687,92 +1889,192 @@ sg_calc_sgat_param(struct sg_device *sdp)
}
/*
- * Depending on which side is calling for the unshare, it is best to unshare
- * the other side first. For example: if the invocation is from the read-side
- * fd then rd_first should be false so the write-side is unshared first.
+ * Only valid for shared file descriptors, else -EINVAL. Should only be
+ * called after a read-side request has successfully completed so that
+ * there is valid data in reserve buffer. If fini1_again0 is true then
+ * read-side is taken out of the state waiting for a write-side request and the
+ * read-side is put in the inactive state. If fini1_again0 is false (0) then
+ * the read-side (assuming it is inactive) is put in a state waiting for
+ * a write-side request. This function is called when the write mask is set on
+ * ioctl(SG_SET_GET_EXTENDED(SG_CTL_FLAGM_READ_SIDE_FINI)).
*/
+static int
+sg_change_after_read_side_rq(struct sg_fd *sfp, bool fini1_again0)
+{
+ int res = 0;
+ enum sg_rq_state sr_st;
+ unsigned long iflags;
+ struct sg_fd *rs_sfp;
+ struct sg_request *rs_rsv_srp = NULL;
+ struct sg_device *sdp = sfp->parentdp;
+
+ rs_sfp = sg_fd_share_ptr(sfp);
+ if (unlikely(!rs_sfp)) {
+ res = -EINVAL;
+ } else if (xa_get_mark(&sdp->sfp_arr, sfp->idx, SG_XA_FD_RS_SHARE)) {
+ rs_rsv_srp = sfp->rsv_srp;
+ rs_sfp = sfp;
+ } else { /* else called on write-side */
+ rs_rsv_srp = rs_sfp->rsv_srp;
+ }
+ if (res || !rs_rsv_srp)
+ goto fini;
+
+ xa_lock_irqsave(&rs_sfp->srp_arr, iflags);
+ sr_st = atomic_read(&rs_rsv_srp->rq_st);
+ if (fini1_again0) {
+ switch (sr_st) {
+ case SG_RQ_SHR_SWAP:
+ rs_rsv_srp->sh_var = SG_SHR_RS_NOT_SRQ;
+ rs_rsv_srp = NULL;
+ res = sg_rq_chg_state(rs_rsv_srp, sr_st, SG_RQ_INACTIVE);
+ if (!res)
+ atomic_inc(&rs_sfp->inactives);
+ break;
+ case SG_RQ_SHR_IN_WS: /* too late, write-side rq active */
+ case SG_RQ_BUSY:
+ res = -EAGAIN;
+ break;
+ default: /* read-side in SG_RQ_SHR_SWAIT is bad */
+ res = -EINVAL;
+ break;
+ }
+ } else {
+ switch (sr_st) {
+ case SG_RQ_INACTIVE:
+ rs_rsv_srp->sh_var = SG_SHR_RS_RQ;
+ res = sg_rq_chg_state(rs_rsv_srp, sr_st, SG_RQ_SHR_SWAP);
+ break;
+ case SG_RQ_SHR_SWAP:
+ break; /* already done, redundant call? */
+ default: /* all other states */
+ res = -EBUSY; /* read-side busy doing ... */
+ break;
+ }
+ }
+ xa_unlock_irqrestore(&rs_sfp->srp_arr, iflags);
+fini:
+ if (unlikely(res)) {
+ SG_LOG(1, sfp, "%s: err=%d\n", __func__, -res);
+ } else {
+ SG_LOG(6, sfp, "%s: okay, fini1_again0=%d\n", __func__,
+ fini1_again0);
+ }
+ return res;
+}
+
static void
-sg_unshare_fds(struct sg_fd *rs_sfp, bool rs_lck, struct sg_fd *ws_sfp,
- bool ws_lck, bool rs_first)
+sg_unshare_rs_fd(struct sg_fd *rs_sfp, bool lck)
{
- bool diff_sdps = true;
unsigned long iflags = 0;
- struct sg_device *sdp;
- struct xarray *xap;
-
- if (rs_lck && ws_lck && rs_sfp && ws_sfp &&
- rs_sfp->parentdp == ws_sfp->parentdp)
- diff_sdps = false;
- if (!rs_first && ws_sfp)
- goto wr_first;
-rd_first:
- if (rs_sfp) {
- sdp = rs_sfp->parentdp;
- xap = &sdp->sfp_arr;
- rcu_assign_pointer(rs_sfp->share_sfp, NULL);
- if (rs_lck && (rs_first || diff_sdps))
- xa_lock_irqsave(xap, iflags);
- __xa_set_mark(xap, rs_sfp->idx, SG_XA_FD_UNSHARED);
- __xa_clear_mark(xap, rs_sfp->idx, SG_XA_FD_RS_SHARE);
- if (rs_lck && (!rs_first || diff_sdps))
- xa_unlock_irqrestore(xap, iflags);
- kref_put(&sdp->d_ref, sg_device_destroy);
- }
- if (!rs_first || !ws_sfp)
- return;
-wr_first:
- if (ws_sfp) {
- sdp = ws_sfp->parentdp;
- xap = &sdp->sfp_arr;
- rcu_assign_pointer(ws_sfp->share_sfp, NULL);
- if (ws_lck && (!rs_first || diff_sdps))
- xa_lock_irqsave(xap, iflags);
- __xa_set_mark(xap, ws_sfp->idx, SG_XA_FD_UNSHARED);
- /* SG_XA_FD_RS_SHARE mark should be already clear */
- if (ws_lck && (rs_first || diff_sdps))
- xa_unlock_irqrestore(xap, iflags);
- kref_put(&sdp->d_ref, sg_device_destroy);
- }
- if (!rs_first && rs_sfp)
- goto rd_first;
+ struct sg_device *sdp = rs_sfp->parentdp;
+ struct xarray *xadp = &sdp->sfp_arr;
+
+ rcu_assign_pointer(rs_sfp->share_sfp, NULL);
+ if (lck)
+ xa_lock_irqsave(xadp, iflags);
+ rs_sfp->ws_srp = NULL;
+ __xa_set_mark(xadp, rs_sfp->idx, SG_XA_FD_UNSHARED);
+ __xa_clear_mark(xadp, rs_sfp->idx, SG_XA_FD_RS_SHARE);
+ if (lck)
+ xa_unlock_irqrestore(xadp, iflags);
+ kref_put(&rs_sfp->f_ref, sg_remove_sfp);/* get: sg_find_sfp_helper() */
+}
+
+static void
+sg_unshare_ws_fd(struct sg_fd *ws_sfp, bool lck)
+{
+ unsigned long iflags;
+ struct sg_device *sdp = ws_sfp->parentdp;
+ struct xarray *xadp = &sdp->sfp_arr;
+
+ rcu_assign_pointer(ws_sfp->share_sfp, NULL);
+ if (lck)
+ xa_lock_irqsave(xadp, iflags);
+ __xa_set_mark(xadp, ws_sfp->idx, SG_XA_FD_UNSHARED);
+ /* SG_XA_FD_RS_SHARE mark should be already clear */
+ if (lck)
+ xa_unlock_irqrestore(xadp, iflags);
+ kref_put(&ws_sfp->f_ref, sg_remove_sfp);/* get: sg_find_sfp_helper() */
}
/*
- * Clean up loose ends that occur when clsong a file descriptor which is
+ * Clean up loose ends that occur when closing a file descriptor which is
* part of a file share. There may be request shares in various states using
- * this file share so care is needed.
+ * this file share so care is needed. Potential race when both sides of fd
+ * share have their fd_s closed (i.e. sg_release()) at around the same time
+ * is the reason for rechecking the FD_RS_SHARE or FD_UNSHARED marks.
*/
static void
sg_remove_sfp_share(struct sg_fd *sfp, bool is_rd_side)
{
+ __maybe_unused int res = 0;
unsigned long iflags;
- struct sg_fd *o_sfp = sg_fd_shared_ptr(sfp);
- struct sg_device *sdp;
- struct xarray *xap;
+ enum sg_rq_state sr_st;
+ struct sg_device *sdp = sfp->parentdp;
+ struct sg_device *sh_sdp;
+ struct sg_fd *sh_sfp;
+ struct sg_request *rsv_srp = NULL;
+ struct sg_request *ws_srp;
+ struct xarray *xadp = &sdp->sfp_arr;
- SG_LOG(3, sfp, "%s: sfp=0x%pK, o_sfp=0x%pK%s\n", __func__, sfp, o_sfp,
- (is_rd_side ? " read-side" : ""));
+ SG_LOG(3, sfp, "%s: sfp=%pK %s\n", __func__, sfp,
+ (is_rd_side ? "read-side" : "write-side"));
+ xa_lock_irqsave(xadp, iflags);
+ sh_sfp = sg_fd_share_ptr(sfp);
+ if (!sg_fd_is_shared(sfp))
+ goto err_out;
+ sh_sdp = sh_sfp->parentdp;
if (is_rd_side) {
- sdp = sfp->parentdp;
- xap = &sdp->sfp_arr;
- xa_lock_irqsave(xap, iflags);
- if (!xa_get_mark(xap, sfp->idx, SG_XA_FD_RS_SHARE)) {
- xa_unlock_irqrestore(xap, iflags);
+ bool set_inactive = false;
+
+ if (!xa_get_mark(xadp, sfp->idx, SG_XA_FD_RS_SHARE)) {
+ xa_unlock_irqrestore(xadp, iflags);
return;
}
- sg_unshare_fds(sfp, false, NULL, false, true);
- xa_unlock_irqrestore(&sdp->sfp_arr, iflags);
+ rsv_srp = sfp->rsv_srp;
+ if (!rsv_srp)
+ goto fini;
+ if (rsv_srp->sh_var != SG_SHR_RS_RQ)
+ goto fini;
+ sr_st = atomic_read(&rsv_srp->rq_st);
+ switch (sr_st) {
+ case SG_RQ_SHR_SWAP:
+ set_inactive = true;
+ break;
+ case SG_RQ_SHR_IN_WS:
+ ws_srp = sfp->ws_srp;
+ if (ws_srp && !IS_ERR(ws_srp)) {
+ ws_srp->sh_var = SG_SHR_WS_NOT_SRQ;
+ sfp->ws_srp = NULL;
+ }
+ set_inactive = true;
+ break;
+ default:
+ break;
+ }
+ rsv_srp->sh_var = SG_SHR_NONE;
+ if (set_inactive) {
+ res = sg_rq_chg_state_ulck(rsv_srp, sr_st, SG_RQ_INACTIVE);
+ if (!res)
+ atomic_inc(&sfp->inactives);
+ }
+fini:
+ if (!xa_get_mark(&sh_sdp->sfp_arr, sh_sfp->idx,
+ SG_XA_FD_FREE) && sg_fd_is_shared(sh_sfp))
+ sg_unshare_ws_fd(sh_sfp, sdp != sh_sdp);
+ sg_unshare_rs_fd(sfp, false);
} else {
- sdp = sfp->parentdp;
- xap = &sdp->sfp_arr;
- xa_lock_irqsave(xap, iflags);
- if (xa_get_mark(xap, sfp->idx, SG_XA_FD_UNSHARED)) {
- xa_unlock_irqrestore(xap, iflags);
+ if (!sg_fd_is_shared(sfp)) {
+ xa_unlock_irqrestore(xadp, iflags);
return;
- }
- sg_unshare_fds(NULL, false, sfp, false, false);
- xa_unlock_irqrestore(xap, iflags);
+ } else if (!xa_get_mark(&sh_sdp->sfp_arr, sh_sfp->idx,
+ SG_XA_FD_FREE))
+ sg_unshare_rs_fd(sh_sfp, sdp != sh_sdp);
+ sg_unshare_ws_fd(sfp, false);
}
+err_out:
+ xa_unlock_irqrestore(xadp, iflags);
}
/*
@@ -1782,41 +2084,45 @@ sg_remove_sfp_share(struct sg_fd *sfp, bool is_rd_side)
*/
static void
sg_do_unshare(struct sg_fd *sfp, bool unshare_val)
+ __must_hold(sfp->f_mutex)
{
bool retry;
int retry_count = 0;
- unsigned long iflags;
+ struct sg_request *rs_rsv_srp;
struct sg_fd *rs_sfp;
struct sg_fd *ws_sfp;
- struct sg_fd *o_sfp = sg_fd_shared_ptr(sfp);
+ struct sg_fd *o_sfp = sg_fd_share_ptr(sfp);
struct sg_device *sdp = sfp->parentdp;
if (xa_get_mark(&sdp->sfp_arr, sfp->idx, SG_XA_FD_UNSHARED)) {
SG_LOG(1, sfp, "%s: not shared ? ?\n", __func__);
- return; /* no share to undo */
+ return; /* no share to undo */
}
if (!unshare_val)
- return;
+ return; /* when unshare value is zero, it's a NOP */
again:
retry = false;
- xa_lock_irqsave(&sfp->srp_arr, iflags);
if (xa_get_mark(&sdp->sfp_arr, sfp->idx, SG_XA_FD_RS_SHARE)) {
rs_sfp = sfp;
ws_sfp = o_sfp;
- if (!xa_trylock(&ws_sfp->srp_arr)) {
- if (++retry_count > SG_ADD_RQ_MAX_RETRIES)
- SG_LOG(1, sfp, "%s: cannot get write-side lock\n",
- __func__);
- else
- retry = true;
- goto fini;
+ rs_rsv_srp = rs_sfp->rsv_srp;
+ if (rs_rsv_srp && rs_rsv_srp->sh_var != SG_SHR_RS_RQ) {
+ if (unlikely(!mutex_trylock(&ws_sfp->f_mutex))) {
+ if (++retry_count > SG_ADD_RQ_MAX_RETRIES)
+ SG_LOG(1, sfp,
+ "%s: cannot get write-side lock\n",
+ __func__);
+ else
+ retry = true;
+ goto fini;
+ }
+ sg_unshare_rs_fd(rs_sfp, true);
+ mutex_unlock(&ws_sfp->f_mutex);
}
- sg_unshare_fds(rs_sfp, false, ws_sfp, false, false);
- xa_unlock(&ws_sfp->srp_arr);
} else { /* called on write-side fd */
rs_sfp = o_sfp;
ws_sfp = sfp;
- if (!xa_trylock(&rs_sfp->srp_arr)) {
+ if (unlikely(!mutex_trylock(&rs_sfp->f_mutex))) {
if (++retry_count > SG_ADD_RQ_MAX_RETRIES)
SG_LOG(1, sfp, "%s: cannot get read side lock\n",
__func__);
@@ -1824,12 +2130,15 @@ sg_do_unshare(struct sg_fd *sfp, bool unshare_val)
retry = true;
goto fini;
}
- sg_unshare_fds(rs_sfp, false, ws_sfp, false, true);
- xa_unlock(&rs_sfp->srp_arr);
+ rs_rsv_srp = rs_sfp->rsv_srp;
+ if (rs_rsv_srp->sh_var != SG_SHR_RS_RQ) {
+ sg_unshare_rs_fd(rs_sfp, true);
+ sg_unshare_ws_fd(ws_sfp, true);
+ }
+ mutex_unlock(&rs_sfp->f_mutex);
}
fini:
- xa_unlock_irqrestore(&sfp->srp_arr, iflags);
- if (retry) {
+ if (unlikely(retry)) {
cpu_relax();
goto again;
}
@@ -1876,12 +2185,14 @@ sg_get_dur(struct sg_request *srp, const enum sg_rq_state *sr_stp,
u32 res = U32_MAX;
switch (sr_stp ? *sr_stp : atomic_read(&srp->rq_st)) {
- case SG_RS_INFLIGHT:
- case SG_RS_BUSY:
+ case SG_RQ_INFLIGHT:
+ case SG_RQ_BUSY:
res = sg_calc_rq_dur(srp, time_in_ns);
break;
- case SG_RS_AWAIT_RCV:
- case SG_RS_INACTIVE:
+ case SG_RQ_AWAIT_RCV:
+ case SG_RQ_SHR_SWAP:
+ case SG_RQ_SHR_IN_WS:
+ case SG_RQ_INACTIVE:
res = srp->duration;
is_dur = true; /* completion has occurred, timing finished */
break;
@@ -1917,7 +2228,7 @@ sg_fill_request_element(struct sg_fd *sfp, struct sg_request *srp,
static inline bool
sg_rq_landed(struct sg_device *sdp, struct sg_request *srp)
{
- return atomic_read_acquire(&srp->rq_st) != SG_RS_INFLIGHT ||
+ return atomic_read_acquire(&srp->rq_st) != SG_RQ_INFLIGHT ||
unlikely(SG_IS_DETACHING(sdp));
}
@@ -1933,7 +2244,7 @@ sg_wait_event_srp(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p,
enum sg_rq_state sr_st;
struct sg_device *sdp = sfp->parentdp;
- if (atomic_read(&srp->rq_st) != SG_RS_INFLIGHT)
+ if (atomic_read(&srp->rq_st) != SG_RQ_INFLIGHT)
goto skip_wait; /* and skip _acquire() */
if (srp->rq_flags & SGV4_FLAG_HIPRI) {
/* call blk_poll(), spinning till found */
@@ -1949,24 +2260,25 @@ sg_wait_event_srp(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p,
if (unlikely(res)) { /* -ERESTARTSYS because signal hit thread */
set_bit(SG_FRQ_IS_ORPHAN, srp->frq_bm);
/* orphans harvested when sfp->keep_orphan is false */
- atomic_set(&srp->rq_st, SG_RS_INFLIGHT);
- SG_LOG(1, sfp, "%s: wait_event_interruptible gave %d\n",
- __func__, res);
+ sg_rq_chg_state_force(srp, SG_RQ_INFLIGHT);
+ SG_LOG(1, sfp, "%s: wait_event_interruptible(): %s[%d]\n",
+ __func__, (res == -ERESTARTSYS ? "ERESTARTSYS" : ""),
+ res);
return res;
}
skip_wait:
if (unlikely(SG_IS_DETACHING(sdp))) {
- sg_rq_chg_state_force(srp, SG_RS_INACTIVE);
+ sg_rq_chg_state_force(srp, SG_RQ_INACTIVE);
atomic_inc(&sfp->inactives);
return -ENODEV;
}
sr_st = atomic_read(&srp->rq_st);
- if (unlikely(sr_st != SG_RS_AWAIT_RCV))
+ if (unlikely(sr_st != SG_RQ_AWAIT_RCV))
return -EPROTO; /* Logic error */
- res = sg_rq_chg_state(srp, sr_st, SG_RS_BUSY);
+ res = sg_rq_chg_state(srp, sr_st, SG_RQ_BUSY);
if (unlikely(res)) {
#if IS_ENABLED(SG_LOG_ACTIVE)
- sg_rq_state_fail_msg(sfp, sr_st, SG_RS_BUSY, __func__);
+ sg_rq_state_fail_msg(sfp, sr_st, SG_RQ_BUSY, __func__);
#endif
return res;
}
@@ -1991,8 +2303,7 @@ sg_ctl_sg_io(struct sg_device *sdp, struct sg_fd *sfp, void __user *p)
struct sg_io_v4 *h4p = (struct sg_io_v4 *)hu8arr;
SG_LOG(3, sfp, "%s: SG_IO%s\n", __func__,
- ((sfp->filp->f_flags & O_NONBLOCK) ? " O_NONBLOCK ignored" :
- ""));
+ (SG_IS_O_NONBLOCK(sfp) ? " O_NONBLOCK ignored" : ""));
res = sg_allow_if_err_recovery(sdp, false);
if (res)
return res;
@@ -2017,14 +2328,20 @@ sg_ctl_sg_io(struct sg_device *sdp, struct sg_fd *sfp, void __user *p)
if (!srp) /* mrq case: already processed all responses */
return res;
res = sg_wait_event_srp(sfp, p, h4p, srp);
- if (res)
- SG_LOG(1, sfp, "%s: %s=0x%pK state: %s\n", __func__,
- "unexpected srp", srp,
- sg_rq_st_str(atomic_read(&srp->rq_st), false));
+#if IS_ENABLED(SG_LOG_ACTIVE)
+ if (unlikely(res))
+ SG_LOG(1, sfp, "%s: %s=0x%pK state: %s, share: %s\n",
+ __func__, "unexpected srp", srp,
+ sg_rq_st_str(atomic_read(&srp->rq_st), false),
+ sg_shr_str(srp->sh_var, false));
+#endif
return res;
}
-/* When use_tag is true then id is a tag, else it is a pack_id. */
+/*
+ * When use_tag is true then id is a tag, else it is a pack_id. Returns
+ * valid srp if match, else returns NULL.
+ */
static struct sg_request *
sg_match_request(struct sg_fd *sfp, bool use_tag, int id)
{
@@ -2056,6 +2373,7 @@ sg_match_request(struct sg_fd *sfp, bool use_tag, int id)
static int
sg_ctl_abort(struct sg_device *sdp, struct sg_fd *sfp, void __user *p)
+ __must_hold(sfp->f_mutex)
{
bool use_tag;
int res, pack_id, tag, id;
@@ -2078,6 +2396,8 @@ sg_ctl_abort(struct sg_device *sdp, struct sg_fd *sfp, void __user *p)
srp = sg_match_request(sfp, use_tag, id);
if (!srp) { /* assume device (not just fd) scope */
xa_unlock_irqrestore(&sfp->srp_arr, iflags);
+ if (!(h4p->flags & SGV4_FLAG_DEV_SCOPE))
+ return -ENODATA;
xa_for_each(&sdp->sfp_arr, idx, o_sfp) {
if (o_sfp == sfp)
continue; /* already checked */
@@ -2095,18 +2415,20 @@ sg_ctl_abort(struct sg_device *sdp, struct sg_fd *sfp, void __user *p)
set_bit(SG_FRQ_ABORTING, srp->frq_bm);
res = 0;
switch (atomic_read(&srp->rq_st)) {
- case SG_RS_BUSY:
+ case SG_RQ_BUSY:
clear_bit(SG_FRQ_ABORTING, srp->frq_bm);
- res = -EBUSY; /* shouldn't occur often */
+ res = -EBUSY; /* should not occur often */
break;
- case SG_RS_INACTIVE: /* inactive on rq_list not good */
+ case SG_RQ_INACTIVE: /* inactive on rq_list not good */
clear_bit(SG_FRQ_ABORTING, srp->frq_bm);
res = -EPROTO;
break;
- case SG_RS_AWAIT_RCV: /* user should still do completion */
+ case SG_RQ_AWAIT_RCV: /* user should still do completion */
+ case SG_RQ_SHR_SWAP:
+ case SG_RQ_SHR_IN_WS:
clear_bit(SG_FRQ_ABORTING, srp->frq_bm);
break; /* nothing to do here, return 0 */
- case SG_RS_INFLIGHT: /* only attempt abort if inflight */
+ case SG_RQ_INFLIGHT: /* only attempt abort if inflight */
srp->rq_result |= (DRIVER_SOFT << 24);
{
struct request *rqq = READ_ONCE(srp->rqq);
@@ -2160,7 +2482,7 @@ sg_find_sfp_helper(struct sg_fd *from_sfp, struct sg_fd *pair_sfp,
xa_lock_irqsave(&from_sdp->sfp_arr, iflags);
rcu_assign_pointer(from_sfp->share_sfp, pair_sfp);
__xa_clear_mark(&from_sdp->sfp_arr, from_sfp->idx, SG_XA_FD_UNSHARED);
- kref_get(&from_sdp->d_ref); /* treat share like pseudo open() */
+ kref_get(&from_sfp->f_ref); /* so unshare done before release */
if (from_rd_side)
__xa_set_mark(&from_sdp->sfp_arr, from_sfp->idx,
SG_XA_FD_RS_SHARE);
@@ -2176,7 +2498,7 @@ sg_find_sfp_helper(struct sg_fd *from_sfp, struct sg_fd *pair_sfp,
if (!from_rd_side)
__xa_set_mark(&pair_sdp->sfp_arr, pair_sfp->idx,
SG_XA_FD_RS_SHARE);
- kref_get(&pair_sdp->d_ref); /* keep symmetry */
+ kref_get(&pair_sfp->f_ref); /* keep symmetry */
xa_unlock_irqrestore(&pair_sdp->sfp_arr, iflags);
return 0;
}
@@ -2336,7 +2658,7 @@ sg_fd_reshare(struct sg_fd *rs_sfp, int new_ws_fd)
int res = 0;
int retry_count = 0;
struct file *filp;
- struct sg_fd *ws_sfp = sg_fd_shared_ptr(rs_sfp);
+ struct sg_fd *ws_sfp = sg_fd_share_ptr(rs_sfp);
SG_LOG(3, ws_sfp, "%s: new_write_side_fd: %d\n", __func__, new_ws_fd);
if (unlikely(!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)))
@@ -2357,7 +2679,7 @@ sg_fd_reshare(struct sg_fd *rs_sfp, int new_ws_fd)
}
SG_LOG(6, ws_sfp, "%s: write-side fd ok, scan for filp=0x%pK\n", __func__,
filp);
- sg_unshare_fds(NULL, false, ws_sfp, false, false);
+ sg_unshare_ws_fd(ws_sfp, false);
again:
ws_sfp = sg_find_sfp_by_fd(filp, new_ws_fd, rs_sfp, true);
if (IS_ERR(ws_sfp)) {
@@ -2386,7 +2708,7 @@ sg_fd_reshare(struct sg_fd *rs_sfp, int new_ws_fd)
* First normalize want_rsv_sz to be >= sfp->sgat_elem_sz and
* <= max_segment_size. Exit if that is the same as old size; otherwise
* create a new candidate request of the new size. Then decide whether to
- * re-use an existing free list request (least buflen >= required size) or
+ * re-use an existing inactive request (least buflen >= required size) or
* use the new candidate. If new one used, leave old one but it is no longer
* the reserved request. Returns 0 on success, else a negated errno value.
*/
@@ -2404,12 +2726,15 @@ sg_set_reserved_sz(struct sg_fd *sfp, int want_rsv_sz)
struct sg_device *sdp = sfp->parentdp;
struct xarray *xafp = &sfp->srp_arr;
+ if (unlikely(!xa_get_mark(&sfp->parentdp->sfp_arr, sfp->idx,
+ SG_XA_FD_UNSHARED)))
+ return -EBUSY; /* this fd can't be either side of share */
o_srp = sfp->rsv_srp;
if (!o_srp)
return -EPROTO;
new_sz = min_t(int, want_rsv_sz, sdp->max_sgat_sz);
new_sz = max_t(int, new_sz, sfp->sgat_elem_sz);
- blen = o_srp->sgat_h.buflen;
+ blen = o_srp->sgatp->buflen;
SG_LOG(3, sfp, "%s: was=%d, ask=%d, new=%d (sgat_elem_sz=%d)\n",
__func__, blen, want_rsv_sz, new_sz, sfp->sgat_elem_sz);
if (blen == new_sz)
@@ -2424,15 +2749,14 @@ sg_set_reserved_sz(struct sg_fd *sfp, int want_rsv_sz)
res = -EPROTO;
goto fini;
}
- if (SG_RS_ACTIVE(o_srp) || sfp->mmap_sz > 0) {
+ if (SG_RQ_ACTIVE(o_srp) || sfp->mmap_sz > 0) {
res = -EBUSY;
goto fini;
}
use_new_srp = true;
xa_for_each(xafp, idx, t_srp) {
- if (t_srp != o_srp && new_sz <= t_srp->sgat_h.buflen &&
- !SG_RS_ACTIVE(t_srp)) {
- /* good candidate on free list, use */
+ if (t_srp != o_srp && new_sz <= t_srp->sgatp->buflen &&
+ !SG_RQ_ACTIVE(t_srp)) {
use_new_srp = false;
sfp->rsv_srp = t_srp;
break;
@@ -2447,7 +2771,7 @@ sg_set_reserved_sz(struct sg_fd *sfp, int want_rsv_sz)
cxc_srp = __xa_cmpxchg(xafp, idx, o_srp, n_srp, GFP_ATOMIC);
if (o_srp == cxc_srp) {
sfp->rsv_srp = n_srp;
- sg_rq_chg_state_force_ulck(n_srp, SG_RS_INACTIVE);
+ sg_rq_chg_state_force_ulck(n_srp, SG_RQ_INACTIVE);
/* don't bump inactives, since replaced an inactive */
xa_unlock_irqrestore(xafp, iflags);
SG_LOG(6, sfp, "%s: new rsv srp=0x%pK ++\n", __func__,
@@ -2496,6 +2820,27 @@ static int put_compat_request_table(struct compat_sg_req_info __user *o,
}
#endif
+static bool
+sg_any_persistent_orphans(struct sg_fd *sfp)
+{
+ if (test_bit(SG_FFD_KEEP_ORPHAN, sfp->ffd_bm)) {
+ int num_waiting = atomic_read(&sfp->waiting);
+ unsigned long idx;
+ struct sg_request *srp;
+ struct xarray *xafp = &sfp->srp_arr;
+
+ if (num_waiting < 1)
+ return false;
+ xa_for_each_marked(xafp, idx, srp, SG_XA_RQ_AWAIT) {
+ if (unlikely(!srp))
+ continue;
+ if (test_bit(SG_FRQ_IS_ORPHAN, srp->frq_bm))
+ return true;
+ }
+ }
+ return false;
+}
+
/*
* Processing of ioctl(SG_SET_GET_EXTENDED(SG_SEIM_CTL_FLAGS)) which is a set
* of boolean flags. Access abbreviations: [rw], read-write; [ro], read-only;
@@ -2509,6 +2854,7 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
const u32 c_flgs_rm = seip->ctl_flags_rd_mask;
const u32 c_flgs_val_in = seip->ctl_flags;
u32 c_flgs_val_out = c_flgs_val_in;
+ struct sg_fd *rs_sfp;
struct sg_device *sdp = sfp->parentdp;
/* TIME_IN_NS boolean, [raw] time in nanoseconds (def: millisecs) */
@@ -2531,6 +2877,13 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
else
c_flgs_val_out &= ~SG_CTL_FLAGM_TAG_FOR_PACK_ID;
}
+ /* ORPHANS boolean, [ro] does this fd have any orphan requests? */
+ if (c_flgs_rm & SG_CTL_FLAGM_ORPHANS) {
+ if (sg_any_persistent_orphans(sfp))
+ c_flgs_val_out |= SG_CTL_FLAGM_ORPHANS;
+ else
+ c_flgs_val_out &= ~SG_CTL_FLAGM_ORPHANS;
+ }
/* OTHER_OPENS boolean, [ro] any other sg open fds on this dev? */
if (c_flgs_rm & SG_CTL_FLAGM_OTHER_OPENS) {
if (atomic_read(&sdp->open_cnt) > 1)
@@ -2554,10 +2907,58 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
* a shared commands is inflight, waits a little while for it
* to finish.
*/
- if (c_flgs_wm & SG_CTL_FLAGM_UNSHARE)
+ if (c_flgs_wm & SG_CTL_FLAGM_UNSHARE) {
+ mutex_lock(&sfp->f_mutex);
sg_do_unshare(sfp, !!(c_flgs_val_in & SG_CTL_FLAGM_UNSHARE));
+ mutex_unlock(&sfp->f_mutex);
+ }
if (c_flgs_rm & SG_CTL_FLAGM_UNSHARE)
- c_flgs_val_out &= ~SG_CTL_FLAGM_UNSHARE; /* clear bit */
+ c_flgs_val_out &= ~SG_CTL_FLAGM_UNSHARE; /* clear bit */
+ /* IS_SHARE boolean: [ro] true if fd may be read-side or write-side share */
+ if (c_flgs_rm & SG_CTL_FLAGM_IS_SHARE) {
+ if (xa_get_mark(&sdp->sfp_arr, sfp->idx, SG_XA_FD_UNSHARED))
+ c_flgs_val_out &= ~SG_CTL_FLAGM_IS_SHARE;
+ else
+ c_flgs_val_out |= SG_CTL_FLAGM_IS_SHARE;
+ }
+ /* IS_READ_SIDE boolean: [ro] true if this fd may be a read-side share */
+ if (c_flgs_rm & SG_CTL_FLAGM_IS_READ_SIDE) {
+ if (xa_get_mark(&sdp->sfp_arr, sfp->idx, SG_XA_FD_RS_SHARE))
+ c_flgs_val_out |= SG_CTL_FLAGM_IS_READ_SIDE;
+ else
+ c_flgs_val_out &= ~SG_CTL_FLAGM_IS_READ_SIDE;
+ }
+ /*
+ * READ_SIDE_FINI boolean, [rbw] should be called by write-side; when
+ * reading: read-side is finished, awaiting action by write-side;
+ * when written: 1 --> write-side doesn't want to continue
+ */
+ if (c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_FINI) {
+ rs_sfp = sg_fd_share_ptr(sfp);
+ if (rs_sfp && rs_sfp->rsv_srp) {
+ struct sg_request *res_srp = rs_sfp->rsv_srp;
+
+ if (atomic_read(&res_srp->rq_st) == SG_RQ_SHR_SWAP)
+ c_flgs_val_out |= SG_CTL_FLAGM_READ_SIDE_FINI;
+ else
+ c_flgs_val_out &= ~SG_CTL_FLAGM_READ_SIDE_FINI;
+ } else {
+ c_flgs_val_out &= ~SG_CTL_FLAGM_READ_SIDE_FINI;
+ }
+ }
+ if (c_flgs_wm & SG_CTL_FLAGM_READ_SIDE_FINI) {
+ bool rs_fini_wm = !!(c_flgs_val_in & SG_CTL_FLAGM_READ_SIDE_FINI);
+
+ sg_change_after_read_side_rq(sfp, rs_fini_wm);
+ }
+ /* READ_SIDE_ERR boolean, [ro] share: read-side finished with error */
+ if (c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_ERR) {
+ rs_sfp = sg_fd_share_ptr(sfp);
+ if (rs_sfp && test_bit(SG_FFD_READ_SIDE_ERR, rs_sfp->ffd_bm))
+ c_flgs_val_out |= SG_CTL_FLAGM_READ_SIDE_ERR;
+ else
+ c_flgs_val_out &= ~SG_CTL_FLAGM_READ_SIDE_ERR;
+ }
/* NO_DURATION boolean, [rbw] */
if (c_flgs_rm & SG_CTL_FLAGM_NO_DURATION)
flg = test_bit(SG_FFD_NO_DURATION, sfp->ffd_bm);
@@ -2700,7 +3101,7 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
}
/* if share then yield device number of (other) read-side */
if (s_rd_mask & SG_SEIM_SHARE_FD) {
- struct sg_fd *sh_sfp = sg_fd_shared_ptr(sfp);
+ struct sg_fd *sh_sfp = sg_fd_share_ptr(sfp);
seip->share_fd = sh_sfp ? sh_sfp->parentdp->index :
U32_MAX;
@@ -2717,7 +3118,7 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
}
/* if share then yield device number of (other) write-side */
if (s_rd_mask & SG_SEIM_CHG_SHARE_FD) {
- struct sg_fd *sh_sfp = sg_fd_shared_ptr(sfp);
+ struct sg_fd *sh_sfp = sg_fd_share_ptr(sfp);
seip->share_fd = sh_sfp ? sh_sfp->parentdp->index :
U32_MAX;
@@ -2766,7 +3167,7 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
}
if (s_rd_mask & SG_SEIM_RESERVED_SIZE)
seip->reserved_sz = (u32)min_t(int,
- sfp->rsv_srp->sgat_h.buflen,
+ sfp->rsv_srp->sgatp->buflen,
sdp->max_sgat_sz);
/* copy to user space if int or boolean read mask non-zero */
if (s_rd_mask || seip->ctl_flags_rd_mask) {
@@ -2863,27 +3264,37 @@ sg_ioctl_common(struct file *filp, struct sg_device *sdp, struct sg_fd *sfp,
SG_LOG(6, sfp, "%s: cmd=0x%x, O_NONBLOCK=%d\n", __func__, cmd_in,
!!(filp->f_flags & O_NONBLOCK));
- if (unlikely(SG_IS_DETACHING(sdp)))
- return -ENODEV;
sdev = sdp->device;
switch (cmd_in) {
case SG_IO:
+ if (unlikely(SG_IS_DETACHING(sdp)))
+ return -ENODEV;
return sg_ctl_sg_io(sdp, sfp, p);
case SG_IOSUBMIT:
SG_LOG(3, sfp, "%s: SG_IOSUBMIT\n", __func__);
+ if (unlikely(SG_IS_DETACHING(sdp)))
+ return -ENODEV;
return sg_ctl_iosubmit(sfp, p);
case SG_IOSUBMIT_V3:
SG_LOG(3, sfp, "%s: SG_IOSUBMIT_V3\n", __func__);
+ if (unlikely(SG_IS_DETACHING(sdp)))
+ return -ENODEV;
return sg_ctl_iosubmit_v3(sfp, p);
case SG_IORECEIVE:
SG_LOG(3, sfp, "%s: SG_IORECEIVE\n", __func__);
+ if (unlikely(SG_IS_DETACHING(sdp)))
+ return -ENODEV;
return sg_ctl_ioreceive(sfp, p);
case SG_IORECEIVE_V3:
SG_LOG(3, sfp, "%s: SG_IORECEIVE_V3\n", __func__);
+ if (unlikely(SG_IS_DETACHING(sdp)))
+ return -ENODEV;
return sg_ctl_ioreceive_v3(sfp, p);
case SG_IOABORT:
SG_LOG(3, sfp, "%s: SG_IOABORT\n", __func__);
+ if (unlikely(SG_IS_DETACHING(sdp)))
+ return -ENODEV;
if (read_only)
return -EPERM;
mutex_lock(&sfp->f_mutex);
@@ -2949,7 +3360,7 @@ sg_ioctl_common(struct file *filp, struct sg_device *sdp, struct sg_fd *sfp,
return res;
case SG_GET_RESERVED_SIZE:
mutex_lock(&sfp->f_mutex);
- val = min_t(int, sfp->rsv_srp->sgat_h.buflen,
+ val = min_t(int, sfp->rsv_srp->sgatp->buflen,
sdp->max_sgat_sz);
mutex_unlock(&sfp->f_mutex);
SG_LOG(3, sfp, "%s: SG_GET_RESERVED_SIZE=%d\n",
@@ -3149,11 +3560,11 @@ sg_srp_q_blk_poll(struct sg_request *srp, struct request_queue *q, int loop_coun
num = (loop_count < 1) ? 1 : loop_count;
for (k = 0; k < num; ++k) {
- if (atomic_read(&srp->rq_st) != SG_RS_INFLIGHT)
+ if (atomic_read(&srp->rq_st) != SG_RQ_INFLIGHT)
return -ENODATA;
n = blk_poll(q, srp->cookie, loop_count < 0 /* spin if negative */);
if (n > 0)
- return atomic_read(&srp->rq_st) == SG_RS_AWAIT_RCV;
+ return atomic_read(&srp->rq_st) == SG_RQ_AWAIT_RCV;
if (n < 0)
return n;
}
@@ -3183,7 +3594,7 @@ sg_sfp_blk_poll(struct sg_fd *sfp, int loop_count)
xa_for_each(xafp, idx, srp) {
if ((srp->rq_flags & SGV4_FLAG_HIPRI) &&
!test_bit(SG_FRQ_SYNC_INVOC, srp->frq_bm) &&
- atomic_read(&srp->rq_st) == SG_RS_INFLIGHT &&
+ atomic_read(&srp->rq_st) == SG_RQ_INFLIGHT &&
test_bit(SG_FRQ_ISSUED, srp->frq_bm)) {
xa_unlock_irqrestore(xafp, iflags);
n = sg_srp_q_blk_poll(srp, q, loop_count);
@@ -3299,7 +3710,7 @@ sg_vma_fault(struct vm_fault *vmf)
goto out_err;
}
mutex_lock(&sfp->f_mutex);
- rsv_schp = &srp->sgat_h;
+ rsv_schp = srp->sgatp;
offset = vmf->pgoff << PAGE_SHIFT;
if (offset >= (unsigned int)rsv_schp->buflen) {
SG_LOG(1, sfp, "%s: offset[%lu] >= rsv.buflen\n", __func__,
@@ -3357,7 +3768,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
}
/* Check reserve request is inactive and has large enough buffer */
srp = sfp->rsv_srp;
- if (SG_RS_ACTIVE(srp)) {
+ if (SG_RQ_ACTIVE(srp)) {
res = -EBUSY;
goto fini;
}
@@ -3425,7 +3836,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
static void
sg_rq_end_io(struct request *rqq, blk_status_t status)
{
- enum sg_rq_state rqq_state = SG_RS_AWAIT_RCV;
+ enum sg_rq_state rqq_state = SG_RQ_AWAIT_RCV;
int a_resid, slen;
u32 rq_result;
unsigned long iflags;
@@ -3452,18 +3863,18 @@ sg_rq_end_io(struct request *rqq, blk_status_t status)
srp->in_resid = a_resid;
}
}
+ if (test_bit(SG_FRQ_ABORTING, srp->frq_bm) && rq_result == 0)
+ srp->rq_result |= (DRIVER_HARD << 24);
- SG_LOG(6, sfp, "%s: pack_id=%d, res=0x%x\n", __func__, srp->pack_id,
- srp->rq_result);
+ SG_LOG(6, sfp, "%s: pack_id=%d, tag=%d, res=0x%x\n", __func__,
+ srp->pack_id, srp->tag, srp->rq_result);
if (srp->start_ns > 0) /* zero only when SG_FFD_NO_DURATION is set */
srp->duration = sg_calc_rq_dur(srp, test_bit(SG_FFD_TIME_IN_NS,
sfp->ffd_bm));
if (unlikely((rq_result & SG_ML_RESULT_MSK) && slen > 0 &&
test_bit(SG_FDEV_LOG_SENSE, sdp->fdev_bm))) {
- u32 scsi_stat = rq_result & 0xff;
-
- if (scsi_stat == SAM_STAT_CHECK_CONDITION ||
- scsi_stat == SAM_STAT_COMMAND_TERMINATED)
+ if ((rq_result & 0xff) == SAM_STAT_CHECK_CONDITION ||
+ (rq_result & 0xff) == SAM_STAT_COMMAND_TERMINATED)
__scsi_print_sense(sdp->device, __func__, scsi_rp->sense, slen);
}
if (slen > 0) {
@@ -3491,10 +3902,10 @@ sg_rq_end_io(struct request *rqq, blk_status_t status)
srp->sense_len = slen;
if (unlikely(test_bit(SG_FRQ_IS_ORPHAN, srp->frq_bm))) {
if (test_bit(SG_FFD_KEEP_ORPHAN, sfp->ffd_bm)) {
- clear_bit(SG_FRQ_SYNC_INVOC, srp->frq_bm);
+ __clear_bit(SG_FRQ_SYNC_INVOC, srp->frq_bm);
} else {
- rqq_state = SG_RS_BUSY;
- set_bit(SG_FRQ_DEACT_ORPHAN, srp->frq_bm);
+ rqq_state = SG_RQ_BUSY;
+ __set_bit(SG_FRQ_DEACT_ORPHAN, srp->frq_bm);
}
}
xa_lock_irqsave(&sfp->srp_arr, iflags);
@@ -3522,7 +3933,7 @@ sg_rq_end_io(struct request *rqq, blk_status_t status)
scsi_req_free_cmd(scsi_rp);
blk_put_request(rqq);
- if (likely(rqq_state == SG_RS_AWAIT_RCV)) {
+ if (likely(rqq_state == SG_RQ_AWAIT_RCV)) {
/* Wake any sg_read()/ioctl(SG_IORECEIVE) awaiting this req */
if (!(srp->rq_flags & SGV4_FLAG_HIPRI))
wake_up_interruptible(&sfp->read_wait);
@@ -3649,7 +4060,7 @@ sg_add_device(struct device *cl_dev, struct class_interface *cl_intf)
goto cdev_add_err;
sdp->cdev = cdev;
- if (sg_sysfs_valid) {
+ if (likely(sg_sysfs_valid)) {
struct device *sg_class_member;
sg_class_member = device_create(sg_sysfs_class, cl_dev->parent,
@@ -3663,7 +4074,7 @@ sg_add_device(struct device *cl_dev, struct class_interface *cl_intf)
}
error = sysfs_create_link(&scsidp->sdev_gendev.kobj,
&sg_class_member->kobj, "generic");
- if (error)
+ if (unlikely(error))
pr_err("%s: unable to make symlink 'generic' back "
"to sg%d\n", __func__, sdp->index);
} else
@@ -3674,7 +4085,6 @@ sg_add_device(struct device *cl_dev, struct class_interface *cl_intf)
"type %d\n", sdp->index, scsidp->type);
dev_set_drvdata(cl_dev, sdp);
-
return 0;
cdev_add_err:
@@ -3694,7 +4104,7 @@ static void
sg_device_destroy(struct kref *kref)
{
struct sg_device *sdp = container_of(kref, struct sg_device, d_ref);
- unsigned long flags;
+ unsigned long iflags;
SCSI_LOG_TIMEOUT(1, pr_info("[tid=%d] %s: sdp idx=%d, sdp=0x%pK --\n",
(current ? current->pid : -1), __func__,
@@ -3706,9 +4116,9 @@ sg_device_destroy(struct kref *kref)
*/
xa_destroy(&sdp->sfp_arr);
- write_lock_irqsave(&sg_index_lock, flags);
+ write_lock_irqsave(&sg_index_lock, iflags);
idr_remove(&sg_index_idr, sdp->index);
- write_unlock_irqrestore(&sg_index_lock, flags);
+ write_unlock_irqrestore(&sg_index_lock, iflags);
put_disk(sdp->disk);
kfree(sdp);
@@ -3962,7 +4372,7 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
kfree(long_cmdp);
return PTR_ERR(rqq);
}
- /* current sg_request protected by SG_RS_BUSY state */
+ /* current sg_request protected by SG_RQ_BUSY state */
scsi_rp = scsi_req(rqq);
WRITE_ONCE(srp->rqq, rqq);
if (rq_flags & SGV4_FLAG_YIELD_TAG)
@@ -3981,15 +4391,15 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
scsi_rp->cmd_len = cwrp->cmd_len;
srp->cmd_opcode = scsi_rp->cmd[0];
us_xfer = !(rq_flags & (SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO));
- assign_bit(SG_FRQ_NO_US_XFER, srp->frq_bm, !us_xfer);
+ assign_bit(SG_FRQ_US_XFER, srp->frq_bm, us_xfer);
reserved = (sfp->rsv_srp == srp);
rqq->end_io_data = srp;
scsi_rp->retries = SG_DEFAULT_RETRIES;
- req_schp = &srp->sgat_h;
+ req_schp = srp->sgatp;
if (dxfer_len <= 0 || dxfer_dir == SG_DXFER_NONE) {
SG_LOG(4, sfp, "%s: no data xfer [0x%pK]\n", __func__, srp);
- set_bit(SG_FRQ_NO_US_XFER, srp->frq_bm);
+ clear_bit(SG_FRQ_US_XFER, srp->frq_bm);
goto fini; /* path of reqs with no din nor dout */
} else if ((rq_flags & SG_FLAG_DIRECT_IO) && iov_count == 0 &&
!sdp->device->host->unchecked_isa_dma &&
@@ -4057,8 +4467,8 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
} else {
srp->bio = rqq->bio;
}
- SG_LOG((res ? 1 : 4), sfp, "%s: %s res=%d [0x%pK]\n", __func__, cp,
- res, srp);
+ SG_LOG((res ? 1 : 4), sfp, "%s: %s %s res=%d [0x%pK]\n", __func__,
+ sg_shr_str(srp->sh_var, false), cp, res, srp);
return res;
}
@@ -4092,7 +4502,7 @@ sg_finish_scsi_blk_rq(struct sg_request *srp)
blk_put_request(rqq);
}
if (srp->bio) {
- bool us_xfer = !test_bit(SG_FRQ_NO_US_XFER, srp->frq_bm);
+ bool us_xfer = test_bit(SG_FRQ_US_XFER, srp->frq_bm);
struct bio *bio = srp->bio;
srp->bio = NULL;
@@ -4118,7 +4528,7 @@ sg_mk_sgat(struct sg_request *srp, struct sg_fd *sfp, int minlen)
gfp_t mask_ap = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN | __GFP_ZERO;
gfp_t mask_kz = GFP_ATOMIC | __GFP_NOWARN;
struct sg_device *sdp = sfp->parentdp;
- struct sg_scatter_hold *schp = &srp->sgat_h;
+ struct sg_scatter_hold *schp = srp->sgatp;
struct page **pgp;
if (unlikely(minlen <= 0)) {
@@ -4234,7 +4644,7 @@ sg_read_append(struct sg_request *srp, void __user *outp, int num_xfer)
{
int k, num, res;
struct page *pgp;
- struct sg_scatter_hold *schp = &srp->sgat_h;
+ struct sg_scatter_hold *schp = srp->sgatp;
SG_LOG(4, srp->parentfp, "%s: num_xfer=%d\n", __func__, num_xfer);
if (unlikely(!outp || num_xfer <= 0))
@@ -4271,13 +4681,13 @@ sg_read_append(struct sg_request *srp, void __user *outp, int num_xfer)
* SG_PACK_ID_WILDCARD and SG_TAG_WILDCARD are -1 and that case is typically
* the fast path. This function is only used in the non-blocking cases.
* Returns pointer to (first) matching sg_request or NULL. If found,
- * sg_request state is moved from SG_RS_AWAIT_RCV to SG_RS_BUSY.
+ * sg_request state is moved from SG_RQ_AWAIT_RCV to SG_RQ_BUSY.
*/
static struct sg_request *
sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag)
{
__maybe_unused bool is_bad_st = false;
- __maybe_unused enum sg_rq_state bad_sr_st = SG_RS_INACTIVE;
+ __maybe_unused enum sg_rq_state bad_sr_st = SG_RQ_INACTIVE;
bool search_for_1 = (id != SG_TAG_WILDCARD);
bool second = false;
enum sg_rq_state sr_st;
@@ -4315,8 +4725,8 @@ sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag)
}
sr_st = atomic_read(&srp->rq_st);
switch (sr_st) {
- case SG_RS_AWAIT_RCV:
- res = sg_rq_chg_state(srp, sr_st, SG_RS_BUSY);
+ case SG_RQ_AWAIT_RCV:
+ res = sg_rq_chg_state(srp, sr_st, SG_RQ_BUSY);
if (likely(res == 0))
goto good;
/* else another caller got it, move on */
@@ -4325,7 +4735,9 @@ sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag)
bad_sr_st = atomic_read(&srp->rq_st);
}
break;
- case SG_RS_INFLIGHT:
+ case SG_RQ_SHR_IN_WS:
+ goto good;
+ case SG_RQ_INFLIGHT:
break;
default:
if (IS_ENABLED(CONFIG_SCSI_PROC_FS)) {
@@ -4358,13 +4770,13 @@ sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag)
srp = xa_find_after(xafp, &idx, end_idx, SG_XA_RQ_AWAIT)) {
if (test_bit(SG_FRQ_SYNC_INVOC, srp->frq_bm))
continue;
- res = sg_rq_chg_state(srp, SG_RS_AWAIT_RCV, SG_RS_BUSY);
+ res = sg_rq_chg_state(srp, SG_RQ_AWAIT_RCV, SG_RQ_BUSY);
if (likely(res == 0)) {
WRITE_ONCE(sfp->low_await_idx, idx + 1);
goto good;
}
#if IS_ENABLED(SG_LOG_ACTIVE)
- sg_rq_state_fail_msg(sfp, SG_RS_AWAIT_RCV, SG_RS_BUSY, __func__);
+ sg_rq_state_fail_msg(sfp, SG_RQ_AWAIT_RCV, SG_RQ_BUSY, __func__);
#endif
}
if (!srp && !second && s_idx > 0) {
@@ -4414,9 +4826,11 @@ sg_mk_srp(struct sg_fd *sfp, bool first)
else
srp = kzalloc(sizeof(*srp), gfp | GFP_ATOMIC);
if (srp) {
- atomic_set(&srp->rq_st, SG_RS_BUSY);
+ atomic_set(&srp->rq_st, SG_RQ_BUSY);
+ srp->sh_var = SG_SHR_NONE;
srp->parentfp = sfp;
srp->tag = SG_TAG_WILDCARD;
+ srp->sgatp = &srp->sgat_h; /* only write-side share changes sgatp */
return srp;
} else {
return ERR_PTR(-ENOMEM);
@@ -4445,7 +4859,7 @@ sg_mk_srp_sgat(struct sg_fd *sfp, bool first, int db_len)
* Irrespective of the given reserve request size, the minimum size requested
* will be PAGE_SIZE (often 4096 bytes). Returns a pointer to reserve object or
* a negated errno value twisted by ERR_PTR() macro. The actual number of bytes
- * allocated (maybe less than buflen) is in srp->sgat_h.buflen . Note that this
+ * allocated (maybe less than buflen) is in srp->sgatp->buflen . Note that this
* function is only called in contexts where locking is not required.
*/
static struct sg_request *
@@ -4482,26 +4896,125 @@ sg_build_reserve(struct sg_fd *sfp, int buflen)
/*
* Setup an active request (soon to carry a SCSI command) to the current file
* descriptor by creating a new one or re-using a request from the free
- * list (fl). If successful returns a valid pointer in SG_RS_BUSY state. On
+ * list (fl). If successful returns a valid pointer in SG_RQ_BUSY state. On
* failure returns a negated errno value twisted by ERR_PTR() macro.
*/
static struct sg_request *
-sg_setup_req(struct sg_comm_wr_t *cwrp, int dxfr_len)
+sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
{
bool act_empty = false;
- bool found = false;
+ bool allow_rsv = true;
bool mk_new_srp = true;
+ bool ws_rq = false;
bool try_harder = false;
bool second = false;
bool has_inactive = false;
- int l_used_idx;
+ int res, l_used_idx;
u32 sum_dlen;
unsigned long idx, s_idx, end_idx, iflags;
+ enum sg_rq_state sr_st;
+ enum sg_rq_state rs_sr_st = SG_RQ_INACTIVE;
struct sg_fd *fp = cwrp->sfp;
struct sg_request *r_srp = NULL; /* request to return */
struct sg_request *low_srp = NULL;
+ __maybe_unused struct sg_request *rsv_srp;
+ struct sg_request *rs_rsv_srp = NULL;
+ struct sg_fd *rs_sfp = NULL;
struct xarray *xafp = &fp->srp_arr;
__maybe_unused const char *cp;
+ char b[48];
+
+ b[0] = '\0';
+ rsv_srp = fp->rsv_srp;
+
+ switch (sh_var) {
+ case SG_SHR_NONE:
+ case SG_SHR_WS_NOT_SRQ:
+ break;
+ case SG_SHR_RS_RQ:
+ sr_st = atomic_read(&rsv_srp->rq_st);
+ if (sr_st == SG_RQ_INACTIVE) {
+ res = sg_rq_chg_state(rsv_srp, sr_st, SG_RQ_BUSY);
+ if (likely(res == 0)) {
+ r_srp = rsv_srp;
+ mk_new_srp = false;
+ cp = "rs_rq";
+ goto good_fini;
+ }
+ }
+ r_srp = ERR_PTR(-EBUSY);
+ break;
+ case SG_SHR_RS_NOT_SRQ:
+ allow_rsv = false;
+ break;
+ case SG_SHR_WS_RQ:
+ rs_sfp = sg_fd_share_ptr(fp);
+ if (!sg_fd_is_shared(fp)) {
+ r_srp = ERR_PTR(-EPROTO);
+ break;
+ }
+ /*
+ * Contention here may be with another potential write-side trying
+ * to pair with this read-side. The loser will receive an
+ * EADDRINUSE errno. The winner advances read-side's rq_state:
+ * SG_RQ_SHR_SWAP --> SG_RQ_SHR_IN_WS
+ */
+ rs_rsv_srp = rs_sfp->rsv_srp;
+ rs_sr_st = atomic_read(&rs_rsv_srp->rq_st);
+ switch (rs_sr_st) {
+ case SG_RQ_AWAIT_RCV:
+ if (rs_rsv_srp->rq_result & SG_ML_RESULT_MSK) {
+ r_srp = ERR_PTR(-ENOSTR);
+ break;
+ }
+ fallthrough;
+ case SG_RQ_SHR_SWAP:
+ ws_rq = true;
+ if (rs_sr_st == SG_RQ_AWAIT_RCV)
+ break;
+ res = sg_rq_chg_state(rs_rsv_srp, rs_sr_st, SG_RQ_SHR_IN_WS);
+ if (unlikely(res))
+ r_srp = ERR_PTR(-EADDRINUSE);
+ break;
+ case SG_RQ_INFLIGHT:
+ case SG_RQ_BUSY:
+ r_srp = ERR_PTR(-EBUSY);
+ break;
+ case SG_RQ_INACTIVE:
+ r_srp = ERR_PTR(-EADDRNOTAVAIL);
+ break;
+ case SG_RQ_SHR_IN_WS:
+ default:
+ r_srp = ERR_PTR(-EADDRINUSE);
+ break;
+ }
+ break;
+ }
+ if (IS_ERR(r_srp)) {
+ if (PTR_ERR(r_srp) == -EBUSY)
+ goto err_out2;
+ if (sh_var == SG_SHR_RS_RQ)
+ snprintf(b, sizeof(b), "SG_SHR_RS_RQ --> sr_st=%s",
+ sg_rq_st_str(sr_st, false));
+ else if (sh_var == SG_SHR_WS_RQ && rs_sfp)
+ snprintf(b, sizeof(b), "SG_SHR_WS_RQ-->rs_sr_st=%s",
+ sg_rq_st_str(rs_sr_st, false));
+ else
+ snprintf(b, sizeof(b), "sh_var=%s",
+ sg_shr_str(sh_var, false));
+ goto err_out;
+ }
+ cp = "";
+
+ if (ws_rq) { /* write-side dlen may be smaller than read-side's dlen */
+ if (dxfr_len > rs_rsv_srp->sgatp->dlen) {
+ SG_LOG(4, fp, "%s: write-side dlen [%d] > read-side dlen\n",
+ __func__, dxfr_len);
+ r_srp = ERR_PTR(-E2BIG);
+ goto err_out;
+ }
+ dxfr_len = 0; /* any srp for write-side will do, pick smallest */
+ }
start_again:
cp = "";
@@ -4516,8 +5029,8 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, int dxfr_len)
if (l_used_idx >= 0 && xa_get_mark(xafp, s_idx, SG_XA_RQ_INACTIVE)) {
r_srp = xa_load(xafp, s_idx);
if (r_srp && r_srp->sgat_h.buflen <= SG_DEF_SECTOR_SZ) {
- if (sg_rq_chg_state(r_srp, SG_RS_INACTIVE, SG_RS_BUSY) == 0) {
- found = true;
+ if (sg_rq_chg_state(r_srp, SG_RQ_INACTIVE, SG_RQ_BUSY) == 0) {
+ mk_new_srp = false;
atomic_dec(&fp->inactives);
goto have_existing;
}
@@ -4525,6 +5038,8 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, int dxfr_len)
}
xa_for_each_marked(xafp, idx, r_srp, SG_XA_RQ_INACTIVE) {
has_inactive = true;
+ if (!allow_rsv && rsv_srp == r_srp)
+ continue;
if (!low_srp && dxfr_len < SG_DEF_SECTOR_SZ) {
low_srp = r_srp;
break;
@@ -4533,11 +5048,11 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, int dxfr_len)
/* If dxfr_len is small, use lowest inactive request */
if (low_srp) {
r_srp = low_srp;
- if (sg_rq_chg_state(r_srp, SG_RS_INACTIVE, SG_RS_BUSY))
+ if (sg_rq_chg_state(r_srp, SG_RQ_INACTIVE, SG_RQ_BUSY))
goto start_again; /* gone to another thread */
atomic_dec(&fp->inactives);
- cp = "toward end of srp_arr";
- found = true;
+ cp = "lowest inactive in srp_arr";
+ mk_new_srp = false;
}
} else {
l_used_idx = READ_ONCE(fp->low_used_idx);
@@ -4548,13 +5063,15 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, int dxfr_len)
for (r_srp = xa_find(xafp, &idx, end_idx, SG_XA_RQ_INACTIVE);
r_srp;
r_srp = xa_find_after(xafp, &idx, end_idx, SG_XA_RQ_INACTIVE)) {
+ if (!allow_rsv && rsv_srp == r_srp)
+ continue;
if (r_srp->sgat_h.buflen >= dxfr_len) {
- if (sg_rq_chg_state(r_srp, SG_RS_INACTIVE, SG_RS_BUSY))
+ if (sg_rq_chg_state(r_srp, SG_RQ_INACTIVE, SG_RQ_BUSY))
continue;
atomic_dec(&fp->inactives);
WRITE_ONCE(fp->low_used_idx, idx + 1);
cp = "near front of srp_arr";
- found = true;
+ mk_new_srp = false;
break;
}
}
@@ -4568,15 +5085,14 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, int dxfr_len)
}
}
have_existing:
- if (found) {
+ if (!mk_new_srp) {
r_srp->in_resid = 0;
r_srp->rq_info = 0;
r_srp->sense_len = 0;
- mk_new_srp = false;
- } else {
- mk_new_srp = true;
}
- if (mk_new_srp) {
+
+good_fini:
+ if (mk_new_srp) { /* Need new sg_request object */
bool allow_cmd_q = test_bit(SG_FFD_CMD_Q, fp->ffd_bm);
int res;
u32 n_idx;
@@ -4608,51 +5124,74 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, int dxfr_len)
res = __xa_alloc(xafp, &n_idx, r_srp, xa_limit_32b, GFP_KERNEL);
xa_unlock_irqrestore(xafp, iflags);
if (res < 0) {
- SG_LOG(1, fp, "%s: xa_alloc() failed, errno=%d\n",
- __func__, -res);
sg_remove_sgat(r_srp);
kfree(r_srp);
r_srp = ERR_PTR(-EPROTOTYPE);
+ SG_LOG(1, fp, "%s: xa_alloc() failed, errno=%d\n",
+ __func__, -res);
goto fini;
}
idx = n_idx;
r_srp->rq_idx = idx;
r_srp->parentfp = fp;
+ sg_rq_chg_state_force(r_srp, SG_RQ_BUSY);
SG_LOG(4, fp, "%s: mk_new_srp=0x%pK ++\n", __func__, r_srp);
}
+ /* following copes with unlikely case where frq_bm > one ulong */
WRITE_ONCE(r_srp->frq_bm[0], cwrp->frq_bm[0]); /* assumes <= 32 req flags */
- r_srp->sgat_h.dlen = dxfr_len;/* must be <= r_srp->sgat_h.buflen */
+ r_srp->sgatp->dlen = dxfr_len;/* must be <= r_srp->sgat_h.buflen */
+ r_srp->sh_var = sh_var;
r_srp->cmd_opcode = 0xff; /* set invalid opcode (VS), 0x0 is TUR */
fini:
/* If setup stalls (e.g. blk_get_request()) debug shows 'elap=1 ns' */
if (test_bit(SG_FFD_TIME_IN_NS, fp->ffd_bm))
r_srp->start_ns = S64_MAX;
- if (IS_ERR(r_srp))
- SG_LOG(1, fp, "%s: err=%ld\n", __func__, PTR_ERR(r_srp));
+ if (ws_rq && rs_rsv_srp) {
+ rs_sfp->ws_srp = r_srp;
+ /* write-side "shares" the read-side reserve request's data buffer */
+ r_srp->sgatp = &rs_rsv_srp->sgat_h;
+ } else if (sh_var == SG_SHR_RS_RQ && test_bit(SG_FFD_READ_SIDE_ERR, fp->ffd_bm))
+ clear_bit(SG_FFD_READ_SIDE_ERR, fp->ffd_bm);
+err_out:
+ if (IS_ERR(r_srp) && b[0])
+ SG_LOG(1, fp, "%s: bad %s\n", __func__, b);
if (!IS_ERR(r_srp))
SG_LOG(4, fp, "%s: %s %sr_srp=0x%pK\n", __func__, cp,
((r_srp == fp->rsv_srp) ? "[rsv] " : ""), r_srp);
+err_out2:
return r_srp;
}
/*
- * Moves a completed sg_request object to the free list and sets it to
- * SG_RS_INACTIVE which makes it available for re-use. Requests with no data
- * associated are appended to the tail of the free list while other requests
- * are prepended to the head of the free list.
+ * Sets srp to SG_RQ_INACTIVE unless it was in SG_RQ_SHR_SWAP state. Also
+ * change the asociated xarray entry flags to be consistent with
+ * SG_RQ_INACTIVE. Since this function can be called from many contexts,
+ * then assume no xa locks held.
+ * The state machine should insure that two threads should never race here.
*/
static void
sg_deact_request(struct sg_fd *sfp, struct sg_request *srp)
{
+ enum sg_rq_state sr_st;
u8 *sbp;
if (WARN_ON(!sfp || !srp))
return;
sbp = srp->sense_bp;
srp->sense_bp = NULL;
- WRITE_ONCE(srp->frq_bm[0], 0);
- sg_rq_chg_state_force(srp, SG_RS_INACTIVE);
- atomic_inc(&sfp->inactives);
+ sr_st = atomic_read(&srp->rq_st);
+ if (sr_st != SG_RQ_SHR_SWAP) { /* mark _BUSY then _INACTIVE at end */
+ /*
+ * Can be called from many contexts and it is hard to know
+ * whether xa locks held. So assume not.
+ */
+ sg_rq_chg_state_force(srp, SG_RQ_INACTIVE);
+ atomic_inc(&sfp->inactives);
+ WRITE_ONCE(srp->frq_bm[0], 0);
+ srp->tag = SG_TAG_WILDCARD;
+ srp->in_resid = 0;
+ srp->rq_info = 0;
+ }
/* maybe orphaned req, thus never read */
if (sbp)
mempool_free(sbp, sg_sense_pool);
@@ -4722,14 +5261,20 @@ sg_add_sfp(struct sg_device *sdp, struct file *filp)
kfree(sfp);
return ERR_PTR(err);
}
- if (srp->sgat_h.buflen < rbuf_len) {
+ if (srp->sgatp->buflen < rbuf_len) {
reduced = true;
SG_LOG(2, sfp,
"%s: reserve reduced from %d to buflen=%d\n",
- __func__, rbuf_len, srp->sgat_h.buflen);
+ __func__, rbuf_len, srp->sgatp->buflen);
}
xa_lock_irqsave(xafp, iflags);
res = __xa_alloc(xafp, &idx, srp, xa_limit_32b, GFP_ATOMIC);
+ if (!res) {
+ srp->rq_idx = idx;
+ srp->parentfp = sfp;
+ sg_rq_chg_state_force_ulck(srp, SG_RQ_INACTIVE);
+ atomic_inc(&sfp->inactives);
+ }
xa_unlock_irqrestore(xafp, iflags);
if (res < 0) {
SG_LOG(1, sfp, "%s: xa_alloc(srp) bad, errno=%d\n",
@@ -4739,10 +5284,6 @@ sg_add_sfp(struct sg_device *sdp, struct file *filp)
kfree(sfp);
return ERR_PTR(-EPROTOTYPE);
}
- srp->rq_idx = idx;
- srp->parentfp = sfp;
- sg_rq_chg_state_force(srp, SG_RS_INACTIVE);
- atomic_inc(&sfp->inactives);
}
if (!reduced) {
SG_LOG(4, sfp, "%s: built reserve buflen=%d\n", __func__,
@@ -4802,7 +5343,7 @@ sg_remove_sfp_usercontext(struct work_struct *work)
xa_for_each(xafp, idx, srp) {
if (!xa_get_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE))
sg_finish_scsi_blk_rq(srp);
- if (srp->sgat_h.buflen > 0)
+ if (srp->sgatp->buflen > 0)
sg_remove_sgat(srp);
if (srp->sense_bp) {
mempool_free(srp->sense_bp, sg_sense_pool);
@@ -4842,29 +5383,14 @@ static void
sg_remove_sfp(struct kref *kref)
{
struct sg_fd *sfp = container_of(kref, struct sg_fd, f_ref);
- struct sg_device *sdp = sfp->parentdp;
- struct xarray *xap = &sdp->sfp_arr;
-
- if (!xa_get_mark(xap, sfp->idx, SG_XA_FD_UNSHARED)) {
- struct sg_fd *o_sfp;
-
- o_sfp = sg_fd_shared_ptr(sfp);
- if (o_sfp && !test_bit(SG_FFD_RELEASE, o_sfp->ffd_bm) &&
- !xa_get_mark(xap, sfp->idx, SG_XA_FD_UNSHARED)) {
- mutex_lock(&o_sfp->f_mutex);
- sg_remove_sfp_share
- (sfp, xa_get_mark(xap, sfp->idx,
- SG_XA_FD_RS_SHARE));
- mutex_unlock(&o_sfp->f_mutex);
- }
- }
+
INIT_WORK(&sfp->ew_fd.work, sg_remove_sfp_usercontext);
schedule_work(&sfp->ew_fd.work);
}
-/* must be called with sg_index_lock held */
static struct sg_device *
sg_lookup_dev(int dev)
+ __must_hold(&sg_index_lock)
{
return idr_find(&sg_index_idr, dev);
}
@@ -4899,14 +5425,37 @@ static const char *
sg_rq_st_str(enum sg_rq_state rq_st, bool long_str)
{
switch (rq_st) { /* request state */
- case SG_RS_INACTIVE:
+ case SG_RQ_INACTIVE:
return long_str ? "inactive" : "ina";
- case SG_RS_INFLIGHT:
+ case SG_RQ_INFLIGHT:
return long_str ? "inflight" : "act";
- case SG_RS_AWAIT_RCV:
+ case SG_RQ_AWAIT_RCV:
return long_str ? "await_receive" : "rcv";
- case SG_RS_BUSY:
+ case SG_RQ_BUSY:
return long_str ? "busy" : "bsy";
+ case SG_RQ_SHR_SWAP: /* only an active read-side has this */
+ return long_str ? "share swap" : "s_wp";
+ case SG_RQ_SHR_IN_WS: /* only an active read-side has this */
+ return long_str ? "share write-side active" : "ws_a";
+ default:
+ return long_str ? "unknown" : "unk";
+ }
+}
+
+static const char *
+sg_shr_str(enum sg_shr_var sh_var, bool long_str)
+{
+ switch (sh_var) { /* share variety of request */
+ case SG_SHR_NONE:
+ return long_str ? "none" : "-";
+ case SG_SHR_RS_RQ:
+ return long_str ? "read-side request" : "rs_rq";
+ case SG_SHR_RS_NOT_SRQ:
+ return long_str ? "read-side, not share request" : "rs_nsh";
+ case SG_SHR_WS_RQ:
+ return long_str ? "write-side request" : "ws_rq";
+ case SG_SHR_WS_NOT_SRQ:
+ return long_str ? "write-side, not share request" : "ws_nsh";
default:
return long_str ? "unknown" : "unk";
}
@@ -4919,6 +5468,12 @@ sg_rq_st_str(enum sg_rq_state rq_st, bool long_str)
{
return "";
}
+
+static const char *
+sg_shr_str(enum sg_shr_var sh_var, bool long_str)
+{
+ return "";
+}
#endif
#if IS_ENABLED(SG_PROC_OR_DEBUG_FS)
@@ -4935,8 +5490,8 @@ static struct sg_dfs_context_t {
} sg_dfs_cxt;
struct sg_proc_deviter {
- loff_t index;
- size_t max;
+ loff_t index;
+ size_t max;
int fd_index;
};
@@ -4963,7 +5518,7 @@ dev_seq_start(struct seq_file *s, loff_t *pos)
it->index = *pos;
it->max = sg_last_dev();
- if (it->index >= it->max)
+ if (it->index >= (int)it->max)
return NULL;
return it;
}
@@ -5040,7 +5595,7 @@ sg_proc_write_dressz(struct file *filp, const char __user *buffer,
sg_big_buff = k;
return count;
}
- return -ERANGE;
+ return -EDOM;
}
static int
@@ -5074,7 +5629,7 @@ sg_proc_seq_show_dev(struct seq_file *s, void *v)
scsidp = sdp->device;
seq_printf(s, "%d\t%d\t%d\t%llu\t%d\t%d\t%d\t%d\t%d\n",
scsidp->host->host_no, scsidp->channel,
- scsidp->id, scsidp->lun, (int) scsidp->type,
+ scsidp->id, scsidp->lun, (int)scsidp->type,
1,
(int) scsidp->queue_depth,
(int) scsi_device_busy(scsidp),
@@ -5133,8 +5688,8 @@ sg_proc_debug_sreq(struct sg_request *srp, int to, bool t_in_ns, char *obp,
rq_st = atomic_read(&srp->rq_st);
dur = sg_get_dur(srp, &rq_st, t_in_ns, &is_dur);
n += scnprintf(obp + n, len - n, "%s%s: dlen=%d/%d id=%d", cp,
- sg_rq_st_str(rq_st, false), srp->sgat_h.dlen,
- srp->sgat_h.buflen, (int)srp->pack_id);
+ sg_rq_st_str(rq_st, false), srp->sgatp->dlen,
+ srp->sgatp->buflen, (int)srp->pack_id);
if (is_dur) /* cmd/req has completed, waiting for ... */
n += scnprintf(obp + n, len - n, " dur=%u%s", dur, tp);
else if (dur < U32_MAX) { /* in-flight or busy (so ongoing) */
@@ -5145,9 +5700,12 @@ sg_proc_debug_sreq(struct sg_request *srp, int to, bool t_in_ns, char *obp,
n += scnprintf(obp + n, len - n, " t_o/elap=%us/%u%s",
to / 1000, dur, tp);
}
+ if (srp->sh_var != SG_SHR_NONE)
+ n += scnprintf(obp + n, len - n, " shr=%s",
+ sg_shr_str(srp->sh_var, false));
cp = (srp->rq_flags & SGV4_FLAG_HIPRI) ? "hipri " : "";
n += scnprintf(obp + n, len - n, " sgat=%d %sop=0x%02x\n",
- srp->sgat_h.num_sgat, cp, srp->cmd_opcode);
+ srp->sgatp->num_sgat, cp, srp->cmd_opcode);
return n;
}
@@ -5160,8 +5718,15 @@ sg_proc_debug_fd(struct sg_fd *fp, char *obp, int len, unsigned long idx,
int n = 0;
int to, k;
unsigned long iflags;
+ const char *cp;
struct sg_request *srp;
+ struct sg_device *sdp = fp->parentdp;
+ if (xa_get_mark(&sdp->sfp_arr, fp->idx, SG_XA_FD_UNSHARED))
+ cp = "";
+ else
+ cp = xa_get_mark(&sdp->sfp_arr, fp->idx, SG_XA_FD_RS_SHARE) ?
+ " shr_rs" : " shr_ws";
/* sgat=-1 means unavailable */
to = (fp->timeout >= 0) ? jiffies_to_msecs(fp->timeout) : -999;
if (to < 0)
@@ -5171,8 +5736,8 @@ sg_proc_debug_fd(struct sg_fd *fp, char *obp, int len, unsigned long idx,
n += scnprintf(obp + n, len - n, "timeout=%dms rs", to);
else
n += scnprintf(obp + n, len - n, "timeout=%ds rs", to / 1000);
- n += scnprintf(obp + n, len - n, "v_buflen=%d idx=%lu\n cmd_q=%d ",
- fp->rsv_srp->sgat_h.buflen, idx,
+ n += scnprintf(obp + n, len - n, "v_buflen=%d%s idx=%lu\n cmd_q=%d ",
+ fp->rsv_srp->sgatp->buflen, cp, idx,
(int)test_bit(SG_FFD_CMD_Q, fp->ffd_bm));
n += scnprintf(obp + n, len - n,
"f_packid=%d k_orphan=%d ffd_bm=0x%lx\n",
@@ -5311,10 +5876,10 @@ sg_proc_seq_show_debug(struct seq_file *s, void *v, bool reduced)
if (!xa_empty(&sdp->sfp_arr)) {
found = true;
disk_name = (sdp->disk ? sdp->disk->disk_name : "?_?");
- if (SG_IS_DETACHING(sdp))
+ if (SG_IS_DETACHING(sdp)) {
snprintf(b1, sizeof(b1), " >>> device=%s %s\n",
disk_name, "detaching pending close\n");
- else if (sdp->device) {
+ } else if (sdp->device) {
n = sg_proc_debug_sdev(sdp, bp, bp_len, fdi_p,
reduced);
if (n >= bp_len - 1) {
@@ -112,6 +112,9 @@ typedef struct sg_io_hdr {
#define SGV4_FLAG_Q_AT_HEAD SG_FLAG_Q_AT_HEAD
#define SGV4_FLAG_IMMED 0x400 /* for polling with SG_IOR, ignored in SG_IOS */
#define SGV4_FLAG_HIPRI 0x800 /* request will use blk_poll to complete */
+#define SGV4_FLAG_DEV_SCOPE 0x1000 /* permit SG_IOABORT to have wider scope */
+#define SGV4_FLAG_SHARE 0x2000 /* share IO buffer; needs SG_SEIM_SHARE_FD */
+#define SGV4_FLAG_NO_DXFER SG_FLAG_NO_DXFER /* but keep dev<-->kernel xfr */
/* Output (potentially OR-ed together) in v3::info or v4::info field */
#define SG_INFO_OK_MASK 0x1
@@ -184,7 +187,12 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
#define SG_CTL_FLAGM_OTHER_OPENS 0x4 /* rd: other sg fd_s on this dev */
#define SG_CTL_FLAGM_ORPHANS 0x8 /* rd: orphaned requests on this fd */
#define SG_CTL_FLAGM_Q_TAIL 0x10 /* used for future cmds on this fd */
+#define SG_CTL_FLAGM_IS_SHARE 0x20 /* rd: fd is read-side or write-side share */
+#define SG_CTL_FLAGM_IS_READ_SIDE 0x40 /* rd: this fd is read-side share */
#define SG_CTL_FLAGM_UNSHARE 0x80 /* undo share after inflight cmd */
+/* rd> 1: read-side finished 0: not; wr> 1: finish share post read-side */
+#define SG_CTL_FLAGM_READ_SIDE_FINI 0x100 /* wr> 0: setup for repeat write-side req */
+#define SG_CTL_FLAGM_READ_SIDE_ERR 0x200 /* rd: sharing, read-side got error */
#define SG_CTL_FLAGM_NO_DURATION 0x400 /* don't calc command duration */
#define SG_CTL_FLAGM_MORE_ASYNC 0x800 /* yield EAGAIN in more cases */
#define SG_CTL_FLAGM_ALL_BITS 0xfff /* should be OR of previous items */
Add request sharing which is invoked on a shared file descriptor by using SGV4_FLAG_SHARE. The file share is asymmetric: the read-side is assumed to do data-in command (e.g. READ) first, followed by the write-side doing a data-out command (e.g. WRITE). The read-side may also set SG_FLAG_NO_DXFER and the write-side must set that flag. If both sides set that flag then a single bio is used and the user space doesn't "see" the data. If the read-side does not set SG_FLAG_NO_DXFER then the read data is copied to the user space. And that copy to user space can replaced by using SG_FLAG_MMAP_IO (but that adds some other overheads). See the webpage at: https://sg.danny.cz/sg/sg_v40.html in the section titled: "8 Request sharing". Signed-off-by: Douglas Gilbert <dgilbert@interlog.com> --- drivers/scsi/sg.c | 1203 +++++++++++++++++++++++++++++----------- include/uapi/scsi/sg.h | 8 + 2 files changed, 892 insertions(+), 319 deletions(-)