前置知识
_IO_FILE_plus 结构体
struct _IO_FILE_plus{ _IO_FILE file; const struct _IO_jump_t *vtable;};_IO_FILE结构体
先说_IO_FILE结构体,该结构体就是标准IO库中用来描述文件的结构,在程序执行fopen函数时会创建该结构,并分配在堆中。
struct _IO_FILE { int _flags; /* High-order word is _IO_MAGIC; rest is flags. */#define _IO_file_flags _flags
/* The following pointers correspond to the C++ streambuf protocol. */ /* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */ char* _IO_read_ptr; /* Current read pointer */ char* _IO_read_end; /* End of get area. */ char* _IO_read_base; /* Start of putback+get area. */ char* _IO_write_base; /* Start of put area. */ char* _IO_write_ptr; /* Current put pointer. */ char* _IO_write_end; /* End of put area. */ char* _IO_buf_base; /* Start of reserve area. */ char* _IO_buf_end; /* End of reserve area. */ /* The following fields are used to support backing up and undo. */ char *_IO_save_base; /* Pointer to start of non-current get area. */ char *_IO_backup_base; /* Pointer to first valid character of backup area */ char *_IO_save_end; /* Pointer to end of non-current get area. */
struct _IO_marker *_markers;
struct _IO_FILE *_chain;
int _fileno;#if 0 int _blksize;#else int _flags2;#endif _IO_off_t _old_offset; /* This used to be _offset but it's too small. */
#define __HAVE_COLUMN /* temporary */ /* 1+column number of pbase(); 0 is unknown. */ unsigned short _cur_column; signed char _vtable_offset; char _shortbuf[1];
/* char* _save_gptr; char* _save_egptr; */
_IO_lock_t *_lock;#ifdef _IO_USE_OLD_IO_FILE};调试源码
#include <stdio.h>#include <stdlib.h>
int main() { FILE *file = fopen("example.txt", "r"); if (file == NULL) { perror("Error opening file"); return EXIT_FAILURE; }
fclose(file); return EXIT_SUCCESS;}总体概述


源码解析
- 打断点到fopen
fopen实际上进入的是_IO_new_fopen函数
FILE *_IO_new_fopen (const char *filename, const char *mode){ return __fopen_internal (filename, mode, 1);}
strong_alias (_IO_new_fopen, __new_fopen)versioned_symbol (libc, _IO_new_fopen, _IO_fopen, GLIBC_2_1);versioned_symbol (libc, __new_fopen, fopen, GLIBC_2_1);
# if !defined O_LARGEFILE || O_LARGEFILE == 0 weak_alias (_IO_new_fopen, _IO_fopen64) weak_alias (_IO_new_fopen, fopen64)# endif
2. _IO_new_fopen函数进入了__fopen_internal 这个函数是对_IO_new_file_init_internal的封装,会在这里申请一个locked_FILE结构体
FILE *__fopen_internal (const char *filename, const char *mode, int is32){ struct locked_FILE { struct _IO_FILE_plus fp;#ifdef _IO_MTSAFE_IO _IO_lock_t lock;#endif struct _IO_wide_data wd; } *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));
if (new_f == NULL) return NULL;#ifdef _IO_MTSAFE_IO new_f->fp.file._lock = &new_f->lock;#endif _IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps); _IO_JUMPS (&new_f->fp) = &_IO_file_jumps; _IO_new_file_init_internal (&new_f->fp); if (_IO_file_fopen ((FILE *) new_f, filename, mode, is32) != NULL) return __fopen_maybe_mmap (&new_f->fp.file);
_IO_un_link (&new_f->fp); free (new_f); return NULL;}pwndbg> ptype struct locked_FILEtype = struct locked_FILE { struct _IO_FILE_plus fp; _IO_lock_t lock; struct _IO_wide_data wd;}- __fopen_internal进入_IO_no_init
可以发现是初始化FILE结构体,分为两个部分_IO_old_init和wide_data的a初始化
void_IO_no_init (FILE *fp, int flags, int orientation,struct _IO_wide_data *wd, const struct _IO_jump_t *jmp){ _IO_old_init (fp, flags); fp->_mode = orientation; if (orientation >= 0){
fp->_wide_data = wd; fp->_wide_data->_IO_buf_base = NULL; fp->_wide_data->_IO_buf_end = NULL; fp->_wide_data->_IO_read_base = NULL; fp->_wide_data->_IO_read_ptr = NULL; fp->_wide_data->_IO_read_end = NULL; fp->_wide_data->_IO_write_base = NULL; fp->_wide_data->_IO_write_ptr = NULL; fp->_wide_data->_IO_write_end = NULL; fp->_wide_data->_IO_save_base = NULL; fp->_wide_data->_IO_backup_base = NULL; fp->_wide_data->_IO_save_end = NULL;
fp->_wide_data->_wide_vtable = jmp;
}
else /* Cause predictable crash when a wide function is called on a byte stream. */ fp->_wide_data = (struct _IO_wide_data *) -1L; fp->_freeres_list = NULL; fp->_total_written = 0;}
初始化之前
4._IO_no_init 进入_IO_old_init
void_IO_old_init (FILE *fp, int flags){ fp->_flags = _IO_MAGIC|flags; /*_IO_MAGIC是魔数标志位,表明这个FILE对象是有效的*/ fp->_flags2 = 0; if (stdio_needs_locking) fp->_flags2 |= _IO_FLAGS2_NEED_LOCK; fp->_IO_buf_base = NULL; fp->_IO_buf_end = NULL; fp->_IO_read_base = NULL; fp->_IO_read_ptr = NULL; fp->_IO_read_end = NULL; fp->_IO_write_base = NULL; fp->_IO_write_ptr = NULL; fp->_IO_write_end = NULL; fp->_chain = NULL; /* Not necessary. */
fp->_IO_save_base = NULL; fp->_IO_backup_base = NULL; fp->_IO_save_end = NULL; fp->_markers = NULL; fp->_cur_column = 0; #if _IO_JUMPS_OFFSET fp->_vtable_offset = 0;#endif#ifdef _IO_MTSAFE_IO if (fp->_lock != NULL) _IO_lock_init (*fp->_lock);#endif}经过_IO_old_init初始化后
5. 设置_mode,这个字段代表的是流的字符方向/宽窄方向
void 587 _IO_no_init (FILE *fp, int flags, int orientation, 588 struct _IO_wide_data *wd, const struct _IO_jump_t *jmp) 589 { 590 _IO_old_init (fp, flags); ► 591 fp->_mode = orientation;`fp->_mode` 不是打开模式 `"r" / "w" / "a"`,而是 **流的字符方向/宽窄方向**:
- `0`:还没定向,未决定是字节流还是宽字符流- `> 0`:宽字符流 wide-oriented- `< 0`:字节流 byte-oriented
6. 初始化宽字符数据_wide_data
7. 返回__fopen_internal 8. 执行_IO_JUMPS (&new_f->fp) = &_IO_file_jumps;
#define _IO_JUMPS(THIS) (THIS)->vtable也就是
new_f->fp.vtable = &_IO_file_jumps;执行之前
pwndbg> p *new_f$5 = { fp = { file = { _flags = -72548352, _IO_read_ptr = 0x0, _IO_read_end = 0x0, _IO_read_base = 0x0, _IO_write_base = 0x0, _IO_write_ptr = 0x0, _IO_write_end = 0x0, _IO_buf_base = 0x0, _IO_buf_end = 0x0, _IO_save_base = 0x0, _IO_backup_base = 0x0, _IO_save_end = 0x0, _markers = 0x0, _chain = 0x0, _fileno = 0, _flags2 = 0, _short_backupbuf = "", _old_offset = 0, _cur_column = 0, _vtable_offset = 0 '\000', _shortbuf = "", _lock = 0x5555555590f0, _offset = 0, _codecvt = 0x0, _wide_data = 0x555555559100, _freeres_list = 0x0, _freeres_buf = 0x0, _prevchain = 0x0, _mode = 0, _unused3 = 0, _total_written = 0, _unused2 = "\000\000\000\000\000\000\000" }, vtable = 0x0 /*这里还没有*/ }, lock = { lock = 0, cnt = 0, owner = 0x0 }, wd = { _IO_read_ptr = 0x0, _IO_read_end = 0x0, _IO_read_base = 0x0, _IO_write_base = 0x0, _IO_write_ptr = 0x0, _IO_write_end = 0x0, _IO_buf_base = 0x0, _IO_buf_end = 0x0, _IO_save_base = 0x0, _IO_backup_base = 0x0, _IO_save_end = 0x0, _IO_state = { __count = 0, __value = { __wch = 0, __wchb = "\000\000\000" } }, _IO_last_state = { __count = 0, __value = { __wch = 0, __wchb = "\000\000\000" } }, _codecvt = { __cd_in = { step = 0x0, step_data = { __outbuf = 0x0, __outbufend = 0x0, __flags = 0, __invocation_counter = 0, __internal_use = 0, __statep = 0x0, __state = { __count = 0, __value = { __wch = 0, __wchb = "\000\000\000" } } } }, __cd_out = { step = 0x0, step_data = { __outbuf = 0x0, __outbufend = 0x0, __flags = 0, __invocation_counter = 0, __internal_use = 0, __statep = 0x0, __state = { __count = 0, __value = { __wch = 0, __wchb = "\000\000\000" } } } } }, _shortbuf = L"", _wide_vtable = 0x7ffff7f7c228 <_IO_wfile_jumps> }}执行后
fp = { file = { _flags = -72548352, _IO_read_ptr = 0x0, _IO_read_end = 0x0, _IO_read_base = 0x0, _IO_write_base = 0x0, _IO_write_ptr = 0x0, _IO_write_end = 0x0, _IO_buf_base = 0x0, _IO_buf_end = 0x0, _IO_save_base = 0x0, _IO_backup_base = 0x0, _IO_save_end = 0x0, _markers = 0x0, _chain = 0x0, _fileno = 0, _flags2 = 0, _short_backupbuf = "", _old_offset = 0, _cur_column = 0, _vtable_offset = 0 '\000', _shortbuf = "", _lock = 0x5555555590f0, _offset = 0, _codecvt = 0x0, _wide_data = 0x555555559100, _freeres_list = 0x0, _freeres_buf = 0x0, _prevchain = 0x0, _mode = 0, _unused3 = 0, _total_written = 0, _unused2 = "\000\000\000\000\000\000\000" }, vtable = 0x7ffff7f7c030 <_IO_file_jumps> }, ... ...}- 进入_IO_new_file_init_internal
void_IO_new_file_init_internal (struct _IO_FILE_plus *fp){ /* POSIX.1 allows another file handle to be used to change the position of our file descriptor. Hence we actually don't know the actual position before we do the first fseek (and until a following fflush). */ fp->file._offset = _IO_pos_BAD; fp->file._flags |= CLOSED_FILEBUF_FLAGS; _IO_link_in (fp); fp->file._fileno = -1;}- 进入 _IO_link_in 主要是把新的_IO_FILE_plus结构体加入_IO_list_all
void_IO_link_in (struct _IO_FILE_plus *fp){ if ((fp->file._flags & _IO_LINKED) == 0) { fp->file._flags |= _IO_LINKED;#ifdef _IO_MTSAFE_IO _IO_cleanup_region_start_noarg (flush_cleanup); _IO_lock_lock (list_all_lock); run_fp = (FILE *) fp; _IO_flockfile ((FILE *) fp);#endif fp->file._chain = (FILE *) _IO_list_all; if (_IO_vtable_offset ((FILE *) fp) == 0) { fp->file._prevchain = (FILE **) &_IO_list_all; if (_IO_list_all != NULL) _IO_list_all->file._prevchain = &fp->file._chain; } _IO_list_all = fp;#ifdef _IO_MTSAFE_IO _IO_funlockfile ((FILE *) fp); run_fp = NULL; _IO_lock_unlock (list_all_lock); _IO_cleanup_region_end (0);#endif } }libc_hidden_def (_IO_link_in)- 执行_IO_file_fopen
基本是根据参数设置各种标志位
FILE *_IO_new_file_fopen (FILE *fp, const char *filename, const char *mode,int is32not64){ int oflags = 0, omode; int read_write; int oprot = 0666; int i; FILE *result; const char *cs; const char *last_recognized;
if (_IO_file_is_open (fp)) return NULL; switch (*mode) { case 'r': omode = O_RDONLY; read_write = _IO_NO_WRITES; break; case 'w': omode = O_WRONLY; oflags = O_CREAT|O_TRUNC; read_write = _IO_NO_READS; break; case 'a': omode = O_WRONLY; oflags = O_CREAT|O_APPEND; read_write = _IO_NO_READS|_IO_IS_APPENDING; break; default: __set_errno (EINVAL); return NULL; }last_recognized = mode;for (i = 1; i < 7; ++i){ switch (*++mode) { case '\0': case ',': break; case '+': omode = O_RDWR; read_write &= _IO_IS_APPENDING; last_recognized = mode; continue; case 'x': oflags |= O_EXCL; last_recognized = mode; continue; case 'b': last_recognized = mode; continue; case 'm': fp->_flags2 |= _IO_FLAGS2_MMAP; continue; case 'c': fp->_flags2 |= _IO_FLAGS2_NOTCANCEL; continue; case 'e': oflags |= O_CLOEXEC; fp->_flags2 |= _IO_FLAGS2_CLOEXEC; continue; default: /* Ignore. */ continue; } break; }
result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,is32not64);
if (result != NULL) { /* Test whether the mode string specifies the conversion. */ cs = strstr (last_recognized + 1, ",ccs="); if (cs != NULL) {
/* Yep. Load the appropriate conversions and set the orientation to wide. */ struct gconv_fcts fcts; struct _IO_codecvt *cc; char *endp = __strchrnul (cs + 5, ','); char *ccs = malloc (endp - (cs + 5) + 3);
if (ccs == NULL) { int malloc_err = errno; /* Whatever malloc failed with. */ (void) _IO_file_close_it (fp); __set_errno (malloc_err); return NULL; }
*((char *) __mempcpy (ccs, cs + 5, endp - (cs + 5))) = '\0'; strip (ccs, ccs);
if (__wcsmbs_named_conv (&fcts, ccs[2] == '\0' ? upstr (ccs, cs + 5) : ccs) != 0) { /* Something went wrong, we cannot load the conversion modules. This means we cannot proceed since the user explicitly asked for these. */ (void) _IO_file_close_it (fp); free (ccs); __set_errno (EINVAL); return NULL; }
free (ccs); assert (fcts.towc_nsteps == 1); assert (fcts.tomb_nsteps == 1);
fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end; fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
/* Clear the state. We start all over again. */ memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t)); memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
cc = fp->_codecvt = &fp->_wide_data->_codecvt; cc->__cd_in.step = fcts.towc;
cc->__cd_in.step_data.__invocation_counter = 0; cc->__cd_in.step_data.__internal_use = 1; cc->__cd_in.step_data.__flags = __GCONV_IS_LAST; cc->__cd_in.step_data.__statep = &result->_wide_data->_IO_state;
cc->__cd_out.step = fcts.tomb;
cc->__cd_out.step_data.__invocation_counter = 0; cc->__cd_out.step_data.__internal_use = 1; cc->__cd_out.step_data.__flags = __GCONV_IS_LAST | __GCONV_TRANSLIT; cc->__cd_out.step_data.__statep = &result->_wide_data->_IO_state;
/* From now on use the wide character callback functions. */ _IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable;
/* Set the mode now. */ result->_mode = 1; }
} return result;}libc_hidden_ver (_IO_new_file_fopen, _IO_file_fopen)
- _IO_file_fopen 进入 _IO_file_open

FILE *_IO_file_open (FILE *fp, const char *filename, int posix_mode, int prot,int read_write, int is32not64){ int fdesc; if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL)) fdesc = __open_nocancel (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot); else fdesc = __open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot); if (fdesc < 0) return NULL; fp->_fileno = fdesc; /*文件标识符fd保存在此*/ _IO_mask_flags (fp, read_write,_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING); /* For append mode, send the file offset to the end of the file. Don't update the offset cache though, since the file handle is not active. */ if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS)) == (_IO_IS_APPENDING | _IO_NO_READS)) { off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end); if (new_pos == _IO_pos_BAD && errno != ESPIPE) { __close_nocancel (fdesc); return NULL; } } _IO_link_in ((struct _IO_FILE_plus *) fp); return fp;}libc_hidden_def (_IO_file_open)#define _IO_mask_flags(fp, f, mask) \ ((fp)->_flags = ((fp)->_flags & ~(mask)) | ((f) & (mask)))等价于
fp->_flags = (fp->_flags & ~(_IO_NO_READS|_IO_NO_WRITES|_IO_IS_APPENDING)) | (read_write & (_IO_NO_READS|_IO_NO_WRITES|_IO_IS_APPENDING));实际效果
| 模式 | read_write 值 | 结果 |
|---|---|---|
| ”r” | _IO_NO_WRITES | 可读,不可写 |
| ”w” | _IO_NO_READS | 不可读,可写 |
| ”a” | _IO_NO_READS|_IO_IS_APPENDING | 不可读,可写,追加 |
| ”r+“ | 0 | 可读可写 |
| ”w+“ | 0 | 可读可写 |
| ”a+“ | _IO_IS_APPENDING | 可读可写,追加 |
| 11._IO_file_open进入open64 |
内部调用了openat
12. 返回._IO_file_open 13. _IO_file_open进入 _IO_link_in,再次确保FILE对象已经链入_IO_list_all 14. _IO_link_in返回 15. _IO_file_open返回__fopen_internal 16. 如果成功直接返回,如果失败fp脱离_IO_list_all,并free掉 17. fopen执行完成
Thanks for reading!
