selph
selph
发布于 2024-02-29 / 32 阅读
0
0

[libc 2.35 源码学习] IO_FILE 篇 - fwrite

简介&前言

这是IO_FILE篇的第三个函数分析:fwrite,本文分析了fwrite的主要流程,fwrite如何写入文件,以及其中各缓冲区的使用:

这里主要涉及的几个缓冲区指针:

// write 缓冲区指针
  wchar_t *_IO_write_base;	/* Start of put area. */
  wchar_t *_IO_write_ptr;	/* Current put pointer. */
  wchar_t *_IO_write_end;	/* End of put area. */
// buf 缓冲区
  wchar_t *_IO_buf_base;	/* Start of reserve area. */
  wchar_t *_IO_buf_end;		/* End of reserve area. */

流程简单概括一下就是:

  1. 如果写入缓冲区有剩余空间,需要写入或者需要刷新,就进行第一次刷新缓冲区操作

  2. 如果要写入的内容超过一个页大小,就先把整页的内容用系统调用写入

  3. 剩下的内容继续写入缓冲区:

    1. 写满了就直接刷新缓冲区再继续写
    2. 没写满就等待下一次刷新的时候写入文件

使用上一节使用的测试代码,稍微改了一下(改了写入内容的大小):

#include<stdio.h>
char buffer[100] = {0};
char strWrite[0x1234] = {'a'};
int main(){
        FILE* fp = fopen("./test.txt","w+");
        size_t bytesRead = fread(buffer,sizeof(char),sizeof(buffer),fp);
        printf("%s",buffer);
        fwrite(strWrite,sizeof(char),sizeof(strWrite),fp);
        fclose(fp);
        system("cat ./test.txt");
        return 0;
}

源码分析

_IO_fwrite

首先进入的是_IO_write:位于 libio/iofwrite.c

size_t
_IO_fwrite (const void *buf, size_t size, size_t count, FILE *fp)
{
  size_t request = size * count;
  size_t written = 0;
  CHECK_FILE (fp, 0);
  if (request == 0)
    return 0;
  _IO_acquire_lock (fp);
  // 检查fp结构,没问题就调用sputn函数
  if (_IO_vtable_offset (fp) != 0 || _IO_fwide (fp, -1) == -1)
    written = _IO_sputn (fp, (const char *) buf, request);
  _IO_release_lock (fp);
  /* We have written all of the input in case the return value indicates
     this or EOF is returned.  The latter is a special case where we
     simply did not manage to flush the buffer.  But the data is in the
     buffer and therefore written as far as fwrite is concerned.  */
  if (written == request || written == EOF)
    return count;
  else
    return written / size;
}
libc_hidden_def (_IO_fwrite)

这里检查了一下fp结构,然后调用_IO_sputn,这是个虚函数,调用的时候有vtable的检查,这里先跳过不管

进入了libcio/fileops.c:_IO_new_file_xsputn

_IO_new_file_xsputn

位于 libio/fileops.c,这是fwrite的主要逻辑:

  1. 计算要写入的大小,如果正在写,判断是否需要刷新

  2. 缓冲区没满就填充缓冲区

  3. 如果还有没写入的数据或者需要刷新,就调用OVERFLOW函数刷新缓冲区

    刷新会将缓冲区的内容通过系统调用write写入文件,然后重置写入缓冲区的指针,详情见下文

  4. 接下来判断需要写入的大小是否超过页尺寸,超过了就直接用系统调用把整页的部分进行写入

  5. 剩下的内容通过_IO_default_xsputn进行进一步处理(见下文)

size_t
_IO_new_file_xsputn(FILE *f, const void *data, size_t n)
{
    const char *s = (const char *)data; // 待写入的数据
    size_t to_do = n;   // 待写入的大小
    int must_flush = 0;
    size_t count = 0;   // 缓冲区可用空间

    if (n <= 0)
        return 0;

    /* This is an optimized implementation.
       If the amount to be written straddles a block boundary
       (or the filebuf is unbuffered), use sys_write directly. */

    /* First figure out how much space is available in the buffer. */
    // 首先找到缓冲区还有多少可用空间可以用来写
    // flags 为线性缓冲模式且正在写
    if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING))
    {
        // 可用空间
        count = f->_IO_buf_end - f->_IO_write_ptr;
        // 可用空间够用
        if (count >= n)
        {
            const char *p;
            for (p = s + n; p > s;)
            {
                // 在可用空间中寻找换行符,找到了就跳出循环,设置must_flush标志为1,必须刷新
                if (*--p == '\n')
                {
                    // 当前要写入大小
                    count = p - s + 1;
                    must_flush = 1;
                    break;
                }
            }
        }
    }
    // 写缓冲区有剩余空间,计算可用大小
    else if (f->_IO_write_end > f->_IO_write_ptr)
        count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */
  
    // 填充缓冲区
    /* Then fill the buffer. */
    if (count > 0)
    {
        // 可用大小超过待写入大小,则直接向缓冲区写入内容,否则就写满缓冲区
        if (count > to_do)
                count = to_do;
        f->_IO_write_ptr = __mempcpy(f->_IO_write_ptr, s, count);
        // 刷新数据指针和待写入大小
        s += count;
        to_do -= count;
    }

    // 如果还有没写入的内容,或者must_flush标志为1
    if (to_do + must_flush > 0)
    {
        size_t block_size, do_write;
        /* Next flush the (full) buffer. */
        // 调用一次_IO_OVERFLOW
        if (_IO_OVERFLOW(f, EOF) == EOF)
            /* If nothing else has to be written we must not signal the
               caller that everything has been written.  */
            return to_do == 0 ? EOF : n - to_do;

        /* Try to maintain alignment: write a whole number of blocks.  */
        // buf大小
        block_size = f->_IO_buf_end - f->_IO_buf_base;
        // 计算需要写入的大小,页对齐
        do_write = to_do - (block_size >= 128 ? to_do % block_size : 0);

        if (do_write)
        {
            // 这里真正处理写入操作,向文件写入do_write字节数,刷新写缓冲区
            count = new_do_write(f, s, do_write);
            to_do -= count;
            // 返回写入的字节数
            if (count < do_write)
                return n - to_do;
        }

        /* Now write out the remainder.  Normally, this will fit in the
       buffer, but it's somewhat messier for line-buffered files,
       so we let _IO_default_xsputn handle the general case. */
        // 不足一个页的待写入大小,调用另一个xsputn进行处理
        if (to_do)
            to_do -= _IO_default_xsputn(f, s + do_write, to_do);
    }
    return n - to_do;
}
libc_hidden_ver(_IO_new_file_xsputn, _IO_file_xsputn)

_IO_new_file_overflow

这是个虚函数,用于刷新写缓冲区,需要写入操作的时候,以及写满了的时候会被调用

int _IO_new_file_overflow(FILE *f, int ch)
{
    if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
    {
        f->_flags |= _IO_ERR_SEEN;
        __set_errno(EBADF);
        return EOF;
    }
    /* If currently reading or no buffer allocated. */
    // 如果没有写缓冲区,就申请一个
    // 如果没有正在执行写,也进入
    if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL)
    {
        /* Allocate a buffer if needed. */
        if (f->_IO_write_base == NULL)
        {
            _IO_doallocbuf(f);
            _IO_setg(f, f->_IO_buf_base, f->_IO_buf_base, f->_IO_buf_base);
        }
        /* Otherwise must be currently reading.
       If _IO_read_ptr (and hence also _IO_read_end) is at the buffer end,
       logically slide the buffer forwards one block (by setting the
       read pointers to all point at the beginning of the block).  This
       makes room for subsequent output.
       Otherwise, set the read pointers to _IO_read_end (leaving that
       alone, so it can continue to correspond to the external position). */

        // #define _IO_in_backup(fp) ((fp)->_flags & _IO_IN_BACKUP)
        // 检查备份标志
        if (__glibc_unlikely(_IO_in_backup(f)))
        {
            // 备份大小
            size_t nbackup = f->_IO_read_end - f->_IO_read_ptr;
            _IO_free_backup_area(f);
            // 修改ptr和base
            f->_IO_read_base -= MIN(nbackup,
                                    f->_IO_read_base - f->_IO_buf_base);
            f->_IO_read_ptr = f->_IO_read_base;
        }
        // 看read_ptr和buf_end,应该是不等于的
        if (f->_IO_read_ptr == f->_IO_buf_end)
            f->_IO_read_end = f->_IO_read_ptr = f->_IO_buf_base;
        // 设置write缓冲区为read_ptr buf_end,指针为read_ptr
        f->_IO_write_ptr = f->_IO_read_ptr;
        f->_IO_write_base = f->_IO_write_ptr;
        f->_IO_write_end = f->_IO_buf_end;
        // 设置read_base 和 read_ptr 为 read_end
        f->_IO_read_base = f->_IO_read_ptr = f->_IO_read_end;
        // 设置标志位,正在写0x0800
        f->_flags |= _IO_CURRENTLY_PUTTING;
        if (f->_mode <= 0 && f->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
            f->_IO_write_end = f->_IO_write_ptr;
    }
    // ch是参数
    if (ch == EOF)
        return _IO_do_write(f, f->_IO_write_base, f->_IO_write_ptr - f->_IO_write_base);
    if (f->_IO_write_ptr == f->_IO_buf_end) /* Buffer is really full */
        if (_IO_do_flush(f) == EOF)
            return EOF;
    *f->_IO_write_ptr++ = ch;
    if ((f->_flags & _IO_UNBUFFERED) || ((f->_flags & _IO_LINE_BUF) && ch == '\n'))
        if (_IO_do_write(f, f->_IO_write_base, f->_IO_write_ptr - f->_IO_write_base) == EOF)
            return EOF;
    return (unsigned char)ch;
}
libc_hidden_ver(_IO_new_file_overflow, _IO_file_overflow)

如果没有正在执行写或者写缓冲区未初始化,就申请内存进行初始化,然后

  • 设置write缓冲区:

            f->_IO_write_ptr = f->_IO_read_ptr;
            f->_IO_write_base = f->_IO_write_ptr;
            f->_IO_write_end = f->_IO_buf_end;
    
  • 设置read缓冲区为不可用:

            f->_IO_read_base = f->_IO_read_ptr = f->_IO_read_end;
    

如果ch参数为EOF:调用_IO_do_write完成写入到文件操作

_IO_new_do_write & new_do_write:系统调用写入文件,刷新write缓冲区位置和指针

_IO_new_do_write:

int _IO_new_do_write(FILE *fp, const char *data, size_t to_do)
{
    return (to_do == 0 || (size_t)new_do_write(fp, data, to_do) == to_do) ? 0 : EOF;
}
libc_hidden_ver(_IO_new_do_write, _IO_do_write)

new_do_write:

static size_t
new_do_write(FILE *fp, const char *data, size_t to_do)
{
    size_t count;
    // 检查标志位
    if (fp->_flags & _IO_IS_APPENDING)
        /* On a system without a proper O_APPEND implementation,
           you would need to sys_seek(0, SEEK_END) here, but is
           not needed nor desirable for Unix- or Posix-like systems.
           Instead, just indicate that offset (before and after) is
           unpredictable. */
        fp->_offset = _IO_pos_BAD;
    // 如果可用空间没到缓冲区末尾
    else if (fp->_IO_read_end != fp->_IO_write_base)
    {
        // 计算新的文件偏移
        off64_t new_pos = _IO_SYSSEEK(fp, fp->_IO_write_base - fp->_IO_read_end, 1);
        if (new_pos == _IO_pos_BAD)
            return 0;
        fp->_offset = new_pos;
    }
    // 系统调用写入文件内容
    count = _IO_SYSWRITE(fp, data, to_do);
    // 如果当前列不为零且写入成功
    if (fp->_cur_column && count)
        // 设置新的cur_column
        fp->_cur_column = _IO_adjust_column(fp->_cur_column - 1, data, count) + 1;
    // 初始化write缓冲区为buf缓冲区
    _IO_setg(fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
  
    // 设置write缓冲区位置
    fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_buf_base;
    fp->_IO_write_end = (fp->_mode <= 0 && (fp->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
                             ? fp->_IO_buf_base
                             : fp->_IO_buf_end);
    // 返回写入字节数
    return count;
}

计算写入到文件的大小,更新文件偏移,获取新的文件指针

使用系统调用完成一次写入

当前列不为0且写入成功,就设置新的当前列fp->_cur_column

初始化read和write缓冲区为buf缓冲区,返回写入字节数

_IO_default_xsputn

进入到这里的时候,剩下要写入的不足一个页大小

size_t
_IO_default_xsputn(FILE *f, const void *data, size_t n)
{
	const char *s = (char *)data;
	size_t more = n;	// 待写入大小
	if (more <= 0)
		return 0;
	for (;;)
	{
		/* Space available. */
		// 空间可用
		if (f->_IO_write_ptr < f->_IO_write_end)
		{
			// 计算剩余大小
			size_t count = f->_IO_write_end - f->_IO_write_ptr;
			// 足够了的话,就设置count为目标大小
			if (count > more)
				count = more;
			// 大于20的话
			if (count > 20)
			{
				// 调用库函数复制到写缓冲区
				f->_IO_write_ptr = __mempcpy(f->_IO_write_ptr, s, count);
				s += count;
			}
			// 小于等于20
			else if (count)
			{
				// 逐字节复制
				char *p = f->_IO_write_ptr;
				ssize_t i;
				for (i = count; --i >= 0;)
					*p++ = *s++;
				f->_IO_write_ptr = p;
			}
			// 更新待写入字节数
			more -= count;
		}
		// 如果写入完了,就跳出循环,OVERFLOW是更新缓冲区的
		if (more == 0 || _IO_OVERFLOW(f, (unsigned char)*s++) == EOF)
			break;
		more--;
	}
	return n - more;
}
libc_hidden_def(_IO_default_xsputn)

计算写缓冲区可用大小,写入请求大小,或者写满

完成一次写入之后,检查剩余要写入的大小,如果没写完,就调用_IO_OVERFLOW刷新写入缓冲区再次进行写入

总结

fwrite的流程看起来复杂,其实也很好理解,简单来说:

如果有数据需要写入或者写入的数据里有换行符

  1. 先调用_IO_OVERFLOW刷新缓冲区,完成先前内容的写入,刷新缓冲区
  2. 计算写入大小,达到整页的大小直接通过系统调用write完成写入
  3. 小于整页大小的数据,调用_IO_default_xsputn进行处理

_IO_default_xsputn是个循环:

  1. 计算剩余写入缓冲区大小,大小超过请求大小的话,就复制到缓冲区然后跳出
  2. 如果完成复制缓冲区还有需要写入的大小,就刷新缓冲区再来一遍

刷新缓冲区的操作:_IO_OVERFLOW -> _IO_new_file_overflow ->_IO_do_write ->_IO_new_do_write -> new_do_write

  1. 计算缓冲区已用大小,计算偏移,通过系统调用写入文件
  2. 初始化写缓冲区为buf缓冲区
  3. 返回写入字节

参考资料

  • [0] Glibc 2.35源码

评论