// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
/*
 * Copyright (C) 2018 HUAWEI, Inc.
 *             http://www.huawei.com/
 * Created by Li Guifu <bluce.liguifu@huawei.com>
 */
#ifndef _LARGEFILE64_SOURCE
#define _LARGEFILE64_SOURCE
#endif
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include "erofs/internal.h"
#ifdef HAVE_LINUX_FS_H
#include <linux/fs.h>
#endif
#ifdef HAVE_LINUX_FALLOC_H
#include <linux/falloc.h>
#endif
#ifdef HAVE_SYS_STATFS_H
#include <sys/statfs.h>
#endif
#define EROFS_MODNAME	"erofs_io"
#include "erofs/print.h"

int erofs_io_fstat(struct erofs_vfile *vf, struct stat *buf)
{
	if (__erofs_unlikely(cfg.c_dry_run)) {
		buf->st_size = 0;
		buf->st_mode = S_IFREG | 0777;
		return 0;
	}

	if (vf->ops)
		return vf->ops->fstat(vf, buf);
	return fstat(vf->fd, buf);
}

ssize_t erofs_io_pwrite(struct erofs_vfile *vf, const void *buf,
			u64 pos, size_t len)
{
	ssize_t ret, written = 0;

	if (__erofs_unlikely(cfg.c_dry_run))
		return 0;

	if (vf->ops)
		return vf->ops->pwrite(vf, buf, pos, len);

	pos += vf->offset;
	do {
#ifdef HAVE_PWRITE64
		ret = pwrite64(vf->fd, buf, len, (off64_t)pos);
#else
		ret = pwrite(vf->fd, buf, len, (off_t)pos);
#endif
		if (ret <= 0) {
			if (!ret)
				break;
			if (errno != EINTR) {
				erofs_err("failed to write: %s", strerror(errno));
				return -errno;
			}
			ret = 0;
		}
		buf += ret;
		pos += ret;
		written += ret;
	} while (written < len);

	return written;
}

int erofs_io_fsync(struct erofs_vfile *vf)
{
	int ret;

	if (__erofs_unlikely(cfg.c_dry_run))
		return 0;

	if (vf->ops)
		return vf->ops->fsync(vf);

	ret = fsync(vf->fd);
	if (ret) {
		erofs_err("failed to fsync(!): %s", strerror(errno));
		return -errno;
	}
	return 0;
}

ssize_t erofs_io_fallocate(struct erofs_vfile *vf, u64 offset,
			   size_t len, bool zeroout)
{
	static const char zero[EROFS_MAX_BLOCK_SIZE] = {0};
	ssize_t ret;

	if (__erofs_unlikely(cfg.c_dry_run))
		return 0;

	if (vf->ops)
		return vf->ops->fallocate(vf, offset, len, zeroout);

#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
	if (!zeroout && fallocate(vf->fd, FALLOC_FL_PUNCH_HOLE |
		    FALLOC_FL_KEEP_SIZE, offset + vf->offset, len) >= 0)
		return 0;
#endif
	while (len > EROFS_MAX_BLOCK_SIZE) {
		ret = erofs_io_pwrite(vf, zero, offset, EROFS_MAX_BLOCK_SIZE);
		if (ret < 0)
			return ret;
		len -= ret;
		offset += ret;
	}
	return erofs_io_pwrite(vf, zero, offset, len) == len ? 0 : -EIO;
}

int erofs_io_ftruncate(struct erofs_vfile *vf, u64 length)
{
	int ret;
	struct stat st;

	if (__erofs_unlikely(cfg.c_dry_run))
		return 0;

	if (vf->ops)
		return vf->ops->ftruncate(vf, length);

	ret = fstat(vf->fd, &st);
	if (ret) {
		erofs_err("failed to fstat: %s", strerror(errno));
		return -errno;
	}
	length += vf->offset;
	if (S_ISBLK(st.st_mode) || st.st_size == length)
		return 0;
	return ftruncate(vf->fd, length);
}

ssize_t erofs_io_pread(struct erofs_vfile *vf, void *buf, u64 pos, size_t len)
{
	ssize_t ret, read = 0;

	if (__erofs_unlikely(cfg.c_dry_run))
		return 0;

	if (vf->ops)
		return vf->ops->pread(vf, buf, pos, len);

	pos += vf->offset;
	do {
#ifdef HAVE_PREAD64
		ret = pread64(vf->fd, buf, len, (off64_t)pos);
#else
		ret = pread(vf->fd, buf, len, (off_t)pos);
#endif
		if (ret <= 0) {
			if (!ret)
				break;
			if (errno != EINTR) {
				erofs_err("failed to read: %s", strerror(errno));
				return -errno;
			}
			ret = 0;
		}
		pos += ret;
		buf += ret;
		read += ret;
	} while (read < len);

	return read;
}

static int erofs_get_bdev_size(int fd, u64 *bytes)
{
	errno = ENOTSUP;
#ifdef BLKGETSIZE64
	if (ioctl(fd, BLKGETSIZE64, bytes) >= 0)
		return 0;
#endif

#ifdef BLKGETSIZE
	{
		unsigned long size;
		if (ioctl(fd, BLKGETSIZE, &size) >= 0) {
			*bytes = ((u64)size << 9);
			return 0;
		}
	}
#endif
	return -errno;
}

#if defined(__linux__) && !defined(BLKDISCARD)
#define BLKDISCARD	_IO(0x12, 119)
#endif

static int erofs_bdev_discard(int fd, u64 block, u64 count)
{
#ifdef BLKDISCARD
	u64 range[2] = { block, count };

	return ioctl(fd, BLKDISCARD, &range);
#else
	return -EOPNOTSUPP;
#endif
}

int erofs_dev_open(struct erofs_sb_info *sbi, const char *dev, int flags)
{
	bool ro = (flags & O_ACCMODE) == O_RDONLY;
	bool truncate = flags & O_TRUNC;
	struct stat st;
	int fd, ret;

#if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
	bool again = false;

repeat:
#endif
	fd = open(dev, (ro ? O_RDONLY : O_RDWR | O_CREAT) | O_BINARY, 0644);
	if (fd < 0) {
		erofs_err("failed to open %s: %s", dev, strerror(errno));
		return -errno;
	}

	if (ro || !truncate)
		goto out;

	ret = fstat(fd, &st);
	if (ret) {
		erofs_err("failed to fstat(%s): %s", dev, strerror(errno));
		close(fd);
		return -errno;
	}

	switch (st.st_mode & S_IFMT) {
	case S_IFBLK:
		ret = erofs_get_bdev_size(fd, &sbi->devsz);
		if (ret) {
			erofs_err("failed to get block device size(%s): %s",
				  dev, strerror(errno));
			close(fd);
			return ret;
		}
		sbi->devsz = round_down(sbi->devsz, erofs_blksiz(sbi));
		ret = erofs_bdev_discard(fd, 0, sbi->devsz);
		if (ret)
			erofs_err("failed to erase block device(%s): %s",
				  dev, erofs_strerror(ret));
		break;
	case S_IFREG:
		if (st.st_size) {
#if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
			struct statfs stfs;

			if (again) {
				close(fd);
				return -ENOTEMPTY;
			}

			/*
			 * fses like EXT4 and BTRFS will flush dirty blocks
			 * after truncate(0) even after the writeback happens
			 * (see kernel commit 7d8f9f7d150d and ccd2506bd431),
			 * which is NOT our intention.  Let's work around this.
			 */
			if (!fstatfs(fd, &stfs) && (stfs.f_type == 0xEF53 ||
					stfs.f_type == 0x9123683E)) {
				close(fd);
				unlink(dev);
				again = true;
				goto repeat;
			}
#endif
			ret = ftruncate(fd, 0);
			if (ret) {
				erofs_err("failed to ftruncate(%s).", dev);
				close(fd);
				return -errno;
			}
		}
		sbi->devblksz = st.st_blksize;
		break;
	default:
		erofs_err("bad file type (%s, %o).", dev, st.st_mode);
		close(fd);
		return -EINVAL;
	}

out:
	sbi->devname = strdup(dev);
	if (!sbi->devname) {
		close(fd);
		return -ENOMEM;
	}
	sbi->bdev.fd = fd;
	erofs_info("successfully to open %s", dev);
	return 0;
}

void erofs_dev_close(struct erofs_sb_info *sbi)
{
	if (!sbi->bdev.ops)
		close(sbi->bdev.fd);
	free(sbi->devname);
	sbi->devname = NULL;
	sbi->bdev.fd = -1;
}

void erofs_blob_closeall(struct erofs_sb_info *sbi)
{
	unsigned int i;

	for (i = 0; i < sbi->nblobs; ++i)
		close(sbi->blobfd[i]);
	sbi->nblobs = 0;
}

int erofs_blob_open_ro(struct erofs_sb_info *sbi, const char *dev)
{
	int fd = open(dev, O_RDONLY | O_BINARY);

	if (fd < 0) {
		erofs_err("failed to open(%s).", dev);
		return -errno;
	}

	sbi->blobfd[sbi->nblobs] = fd;
	erofs_info("successfully to open blob%u %s", sbi->nblobs, dev);
	++sbi->nblobs;
	return 0;
}

ssize_t erofs_dev_read(struct erofs_sb_info *sbi, int device_id,
		       void *buf, u64 offset, size_t len)
{
	ssize_t read;

	if (device_id) {
		if (device_id >= sbi->nblobs) {
			erofs_err("invalid device id %d", device_id);
			return -EIO;
		}
		read = erofs_io_pread(&((struct erofs_vfile) {
				.fd = sbi->blobfd[device_id - 1],
			}), buf, offset, len);
	} else {
		read = erofs_io_pread(&sbi->bdev, buf, offset, len);
	}

	if (read < 0)
		return read;
	if (read < len) {
		erofs_info("reach EOF of device @ %llu, pading with zeroes",
			   offset | 0ULL);
		memset(buf + read, 0, len - read);
	}
	return 0;
}

static ssize_t __erofs_copy_file_range(int fd_in, u64 *off_in,
				       int fd_out, u64 *off_out,
				       size_t length)
{
	size_t copied = 0;
	char buf[8192];

	/*
	 * Main copying loop.  The buffer size is arbitrary and is a
	 * trade-off between stack size consumption, cache usage, and
	 * amortization of system call overhead.
	 */
	while (length > 0) {
		size_t to_read;
		ssize_t read_count;
		char *end, *p;

		to_read = min_t(size_t, length, sizeof(buf));
#ifdef HAVE_PREAD64
		read_count = pread64(fd_in, buf, to_read, *off_in);
#else
		read_count = pread(fd_in, buf, to_read, *off_in);
#endif
		if (read_count == 0)
			/* End of file reached prematurely. */
			return copied;
		if (read_count < 0) {
			/* Report the number of bytes copied so far. */
			if (copied > 0)
				return copied;
			return -1;
		}
		*off_in += read_count;

		/* Write the buffer part which was read to the destination. */
		end = buf + read_count;
		for (p = buf; p < end; ) {
			ssize_t write_count;

#ifdef HAVE_PWRITE64
			write_count = pwrite64(fd_out, p, end - p, *off_out);
#else
			write_count = pwrite(fd_out, p, end - p, *off_out);
#endif
			if (write_count < 0) {
				/*
				 * Adjust the input read position to match what
				 * we have written, so that the caller can pick
				 * up after the error.
				 */
				size_t written = p - buf;
				/*
				 * NB: This needs to be signed so that we can
				 * form the negative value below.
				 */
				ssize_t overread = read_count - written;

				*off_in -= overread;
				/* Report the number of bytes copied so far. */
				if (copied + written > 0)
					return copied + written;
				return -1;
			}
			p += write_count;
			*off_out += write_count;
		} /* Write loop.  */
		copied += read_count;
		length -= read_count;
	}
	return copied;
}

ssize_t erofs_copy_file_range(int fd_in, u64 *off_in, int fd_out, u64 *off_out,
			      size_t length)
{
#ifdef HAVE_COPY_FILE_RANGE
	off64_t off64_in = *off_in, off64_out = *off_out;
	ssize_t ret;

	ret = copy_file_range(fd_in, &off64_in, fd_out, &off64_out,
			      length, 0);
	if (ret >= 0)
		goto out;
	if (errno != ENOSYS && errno != EXDEV) {
		ret = -errno;
out:
		*off_in = off64_in;
		*off_out = off64_out;
		return ret;
	}
#endif
	return __erofs_copy_file_range(fd_in, off_in, fd_out, off_out, length);
}

ssize_t erofs_io_read(struct erofs_vfile *vf, void *buf, size_t bytes)
{
	ssize_t i = 0;

	if (vf->ops)
		return vf->ops->read(vf, buf, bytes);

	while (bytes) {
		int len = bytes > INT_MAX ? INT_MAX : bytes;
		int ret;

		ret = read(vf->fd, buf + i, len);
		if (ret < 1) {
			if (ret == 0) {
				break;
			} else if (errno != EINTR) {
				erofs_err("failed to read : %s",
					  strerror(errno));
				return -errno;
			}
		}
		bytes -= ret;
		i += ret;
        }
        return i;
}

#ifdef HAVE_SYS_SENDFILE_H
#include <sys/sendfile.h>
#endif

off_t erofs_io_lseek(struct erofs_vfile *vf, u64 offset, int whence)
{
	if (vf->ops)
		return vf->ops->lseek(vf, offset, whence);

	return lseek(vf->fd, offset, whence);
}

int erofs_io_xcopy(struct erofs_vfile *vout, off_t pos,
		   struct erofs_vfile *vin, unsigned int len, bool noseek)
{
	if (vout->ops)
		return vout->ops->xcopy(vout, pos, vin, len, noseek);

	if (len && !vin->ops) {
		off_t ret __maybe_unused;

#ifdef HAVE_COPY_FILE_RANGE
		ret = copy_file_range(vin->fd, NULL, vout->fd, &pos, len, 0);
		if (ret > 0)
			len -= ret;
#endif
#if defined(HAVE_SYS_SENDFILE_H) && defined(HAVE_SENDFILE)
		if (len && !noseek) {
			ret = lseek(vout->fd, pos, SEEK_SET);
			if (ret == pos) {
				ret = sendfile(vout->fd, vin->fd, NULL, len);
				if (ret > 0) {
					pos += ret;
					len -= ret;
				}
			}
		}
#endif
	}

	do {
		char buf[32768];
		int ret = min_t(unsigned int, len, sizeof(buf));

		ret = erofs_io_read(vin, buf, ret);
		if (ret < 0)
			return ret;
		if (ret > 0) {
			ret = erofs_io_pwrite(vout, buf, pos, ret);
			if (ret < 0)
				return ret;
			pos += ret;
		}
		len -= ret;
	} while (len);
	return 0;
}
