如何在C上recursion列出Linux上的目录?

我需要recursion列出C编程中的所有目录和文件。 我已经看过FTW,但是没有包含在我正在使用的两个操作系统(Fedora和Minix)中。 在过去的几个小时里,我读过的所有不同的东西都让我头痛不已。

如果有人知道一个代码片段,我可以看看,这将是惊人的,或者如果任何人都可以给我这个很好的方向,我将非常感激。

这是一个recursion的版本:

#include <unistd.h> #include <sys/types.h> #include <dirent.h> #include <stdio.h> #include <string.h> void listdir(const char *name, int indent) { DIR *dir; struct dirent *entry; if (!(dir = opendir(name))) return; while ((entry = readdir(dir)) != NULL) { if (entry->d_type == DT_DIR) { char path[1024]; if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) continue; snprintf(path, sizeof(path), "%s/%s", name, entry->d_name); printf("%*s[%s]\n", indent, "", entry->d_name); listdir(path, indent + 2); } else { printf("%*s- %s\n", indent, "", entry->d_name); } } closedir(dir); } int main(void) { listdir(".", 0); return 0; } 

为什么每个人都坚持一遍又一遍地重复发明?

POSIX.1-2008对nftw()函数进行了标准化,这个函数也在Single Unix Specification v4(SuSv4)中定义,并且可以在Linux(glibc, man 3 nftw ),OS X以及大多数当前的BSD变体中使用。 这并不是什么新鲜事。

opendir() / readdir() / closedir()的实现几乎从不处理在树遍历期间移动,重命名或删除目录或文件的情况,而nftw()应该nftw()处理它们。

例如,考虑下面的C程序,它列出从当前工作目录开始的目录树,或者在命令行上命名的每个目录,或者仅仅是在命令行中命名的文件:

 /* We want POSIX.1-2008 + XSI, ie SuSv4, features */ #define _XOPEN_SOURCE 700 /* Added on 2017-06-25: If the C library can support 64-bit file sizes and offsets, using the standard names, these defines tell the C library to do so. */ #define _LARGEFILE64_SOURCE #define _FILE_OFFSET_BITS 64 #include <stdlib.h> #include <unistd.h> #include <ftw.h> #include <time.h> #include <stdio.h> #include <string.h> #include <errno.h> /* POSIX.1 says each process has at least 20 file descriptors. * Three of those belong to the standard streams. * Here, we use a conservative estimate of 15 available; * assuming we use at most two for other uses in this program, * we should never run into any problems. * Most trees are shallower than that, so it is efficient. * Deeper trees are traversed fine, just a bit slower. * (Linux allows typically hundreds to thousands of open files, * so you'll probably never see any issues even if you used * a much higher value, say a couple of hundred, but * 15 is a safe, reasonable value.) */ #ifndef USE_FDS #define USE_FDS 15 #endif int print_entry(const char *filepath, const struct stat *info, const int typeflag, struct FTW *pathinfo) { /* const char *const filename = filepath + pathinfo->base; */ const double bytes = (double)info->st_size; /* Not exact if large! */ struct tm mtime; localtime_r(&(info->st_mtime), &mtime); printf("%04d-%02d-%02d %02d:%02d:%02d", mtime.tm_year+1900, mtime.tm_mon+1, mtime.tm_mday, mtime.tm_hour, mtime.tm_min, mtime.tm_sec); if (bytes >= 1099511627776.0) printf(" %9.3f TiB", bytes / 1099511627776.0); else if (bytes >= 1073741824.0) printf(" %9.3f GiB", bytes / 1073741824.0); else if (bytes >= 1048576.0) printf(" %9.3f MiB", bytes / 1048576.0); else if (bytes >= 1024.0) printf(" %9.3f KiB", bytes / 1024.0); else printf(" %9.0f B ", bytes); if (typeflag == FTW_SL) { char *target; size_t maxlen = 1023; ssize_t len; while (1) { target = malloc(maxlen + 1); if (target == NULL) return ENOMEM; len = readlink(filepath, target, maxlen); if (len == (ssize_t)-1) { const int saved_errno = errno; free(target); return saved_errno; } if (len >= (ssize_t)maxlen) { free(target); maxlen += 1024; continue; } target[len] = '\0'; break; } printf(" %s -> %s\n", filepath, target); free(target); } else if (typeflag == FTW_SLN) printf(" %s (dangling symlink)\n", filepath); else if (typeflag == FTW_F) printf(" %s\n", filepath); else if (typeflag == FTW_D || typeflag == FTW_DP) printf(" %s/\n", filepath); else if (typeflag == FTW_DNR) printf(" %s/ (unreadable)\n", filepath); else printf(" %s (unknown)\n", filepath); return 0; } int print_directory_tree(const char *const dirpath) { int result; /* Invalid directory path? */ if (dirpath == NULL || *dirpath == '\0') return errno = EINVAL; result = nftw(dirpath, print_entry, USE_FDS, FTW_PHYS); if (result >= 0) errno = result; return errno; } int main(int argc, char *argv[]) { int arg; if (argc < 2) { if (print_directory_tree(".")) { fprintf(stderr, "%s.\n", strerror(errno)); return EXIT_FAILURE; } } else { for (arg = 1; arg < argc; arg++) { if (print_directory_tree(argv[arg])) { fprintf(stderr, "%s.\n", strerror(errno)); return EXIT_FAILURE; } } } return EXIT_SUCCESS; } 

上面的大部分代码都在print_entry() 。 它的任务是打印出每个目录条目。 在print_directory_tree() ,我们告诉nftw()为它看到的每个目录条目调用它。

上面唯一的手动波形细节是关于应该让nftw()使用多less个文​​件描述符的决定。 如果您的程序在文件树行走期间最多使用两个额外的文件描述符(除了标准stream),15已知是安全的(在所有具有nftw()系统上,并且大部分是POSIX兼容的)。

在Linux中,您可以使用sysconf(_SC_OPEN_MAX)来查找打开的文件的最大数量,并且用nftw()调用减去您使用的数字,但是我不会打扰(除非我知道该实用程序主要用于病理学深度的目录结构)。 15个描述符不限制树的深度; nftw()只是变慢(如果从一个目录走过一个更深的目录,可能无法检测目录中的更改,尽pipe在系统和C库实现之间进行权衡和检测变化的一般function也不尽相同)。 只要使用像这样的编译时常量就可以保持代码的可移植性 – 它不仅适用于Linux,也适用于Mac OS X和所有当前的BSD变体以及大多数其他不太旧的Unix变体。

在一个评论中,Ruslan提到他们不得不切换到nftw64()因为他们有需要64位大小/偏移的文件系统条目,而nftw()的“正常”版本因errno == EOVERFLOW失败。 正确的解决scheme是不切换到特定于GLIBC的64位函数,而是定义_LARGEFILE64_SOURCE_FILE_OFFSET_BITS 64 。 这些告诉C库在使用标准函数( nftw()fstat()等)和types名称( off_t等)的情况下,如果可能的话,切换到64位文件大小和偏移量。

 int is_directory_we_want_to_list(const char *parent, char *name) { struct stat st_buf; if (!strcmp(".", name) || !strcmp("..", name)) return 0; char *path = alloca(strlen(name) + strlen(parent) + 2); sprintf(path, "%s/%s", parent, name); stat(path, &st_buf); return S_ISDIR(st_buf.st_mode); } int list(const char *name) { DIR *dir = opendir(name); struct dirent *ent; while (ent = readdir(dir)) { char *entry_name = ent->d_name; printf("%s\n", entry_name); if (is_directory_we_want_to_list(name, entry_name)) { // You can consider using alloca instead. char *next = malloc(strlen(name) + strlen(entry_name) + 2); sprintf(next, "%s/%s", name, entry_name); list(next); free(next); } } closedir(dir); } 

在这个上下文中值得浏览的头文件是: stat.h , dirent.h 。 请记住,上面的代码不检查可能发生的任何错误。

在ftw.h中定义了一个完全不同的方法。

正如我在评论中提到的那样,我认为recursion方法在这个任务中有两个固有的缺陷。

第一个缺陷是打开文件的限制。 此限制对深度遍历施加了限制。 如果有足够的子文件夹,则recursion方法将会中断。 ( 请参阅关于堆栈溢出的编辑

第二个缺陷更微妙一点。 recursion方法使得testing硬链接非常困难。 如果一个文件夹树是循环的(由于硬链接),recursion方法将中断(希望没有堆栈溢出)。 ( 请参阅关于硬链接的编辑

但是,通过用单个文件描述符和链接列表replacerecursion来避免这些问题是非常简单的。

我认为这不是一个学校项目,recursion是可选的。

这是一个示例应用程序。

使用a.out ./查看文件夹树。

我为macros和东西道歉…我通常使用内联函数,但我认为如果它是在一个单一的function,它将更容易遵循的代码。

 #include <dirent.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> int main(int argc, char const *argv[]) { /* print use instruction unless a folder name was given */ if (argc < 2) fprintf(stderr, "\nuse:\n" " %s <directory>\n" "for example:\n" " %s ./\n\n", argv[0], argv[0]), exit(0); /*************** a small linked list macro implementation ***************/ typedef struct list_s { struct list_s *next; struct list_s *prev; } list_s; #define LIST_INIT(name) \ { .next = &name, .prev = &name } #define LIST_PUSH(dest, node) \ do { \ (node)->next = (dest)->next; \ (node)->prev = (dest); \ (node)->next->prev = (node); \ (dest)->next = (node); \ } while (0); #define LIST_POP(list, var) \ if ((list)->next == (list)) { \ var = NULL; \ } else { \ var = (list)->next; \ (list)->next = var->next; \ var->next->prev = var->prev; \ } /*************** a record (file / folder) item type ***************/ typedef struct record_s { /* this is a flat processing queue. */ list_s queue; /* this will list all queued and processed folders (cyclic protection) */ list_s folders; /* this will list all the completed items (siblings and such) */ list_s list; /* unique ID */ ino_t ino; /* name length */ size_t len; /* name string */ char name[]; } record_s; /* take a list_s pointer and convert it to the record_s pointer */ #define NODE2RECORD(node, list_name) \ ((record_s *)(((uintptr_t)(node)) - \ ((uintptr_t) & ((record_s *)0)->list_name))) /* initializes a new record */ #define RECORD_INIT(name) \ (record_s){.queue = LIST_INIT((name).queue), \ .folders = LIST_INIT((name).folders), \ .list = LIST_INIT((name).list)} /*************** the actual code ***************/ record_s records = RECORD_INIT(records); record_s *pos, *item; list_s *tmp; DIR *dir; struct dirent *entry; /* initialize the root folder record and add it to the queue */ pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2); *pos = RECORD_INIT(*pos); pos->len = strlen(argv[1]); memcpy(pos->name, argv[1], pos->len); if (pos->name[pos->len - 1] != '/') pos->name[pos->len++] = '/'; pos->name[pos->len] = 0; /* push to queue, but also push to list (first item processed) */ LIST_PUSH(&records.queue, &pos->queue); LIST_PUSH(&records.list, &pos->list); /* as long as the queue has items to be processed, do so */ while (records.queue.next != &records.queue) { /* pop queued item */ LIST_POP(&records.queue, tmp); /* collect record to process */ pos = NODE2RECORD(tmp, queue); /* add record to the processed folder list */ LIST_PUSH(&records.folders, &pos->folders); /* process the folder and add all folder data to current list */ dir = opendir(pos->name); if (!dir) continue; while ((entry = readdir(dir)) != NULL) { /* create new item, copying it's path data and unique ID */ item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2); *item = RECORD_INIT(*item); item->len = pos->len + entry->d_namlen; memcpy(item->name, pos->name, pos->len); memcpy(item->name + pos->len, entry->d_name, entry->d_namlen); item->name[item->len] = 0; item->ino = entry->d_ino; /* add item to the list, right after the `pos` item */ LIST_PUSH(&pos->list, &item->list); /* unless it's a folder, we're done. */ if (entry->d_type != DT_DIR) continue; /* test for '.' and '..' */ if (entry->d_name[0] == '.' && (entry->d_name[1] == 0 || (entry->d_name[1] == '.' && entry->d_name[2] == 0))) continue; /* add folder marker */ item->name[item->len++] = '/'; item->name[item->len] = 0; /* test for cyclic processing */ list_s *t = records.folders.next; while (t != &records.folders) { if (NODE2RECORD(t, folders)->ino == item->ino) { /* we already processed this folder! */ break; /* this breaks from the small loop... */ } t = t->next; } if (t != &records.folders) continue; /* if we broke from the small loop, entry is done */ /* item is a new folder, add to queue */ LIST_PUSH(&records.queue, &item->queue); } closedir(dir); } /*************** Printing the results and cleaning up ***************/ while (records.list.next != &records.list) { /* pop list item */ LIST_POP(&records.list, tmp); /* collect record to process */ pos = NODE2RECORD(tmp, list); /* prepare for next iteration */ LIST_POP(&records.list, tmp); fwrite(pos->name, pos->len, 1, stderr); fwrite("\n", 1, 1, stderr); free(pos); } return 0; } 

编辑

@Stargateur在评论中提到,recursion代码在达到打开文件限制之前可能会溢出堆栈。

虽然我没有看到堆栈溢出如何更好,但是只要进程在调用时不接近文件限制,这个评估就可能是正确的。

@Stargateur在评论中提到的另一点是,recursion代码的深度受限于最大数量的子目录(ext4文件系统上的64000),并且硬链接极不可能(因为到文件夹的硬链接不是允许在Linux / Unix上)。

如果代码在Linux上运行(这是根据问题),这是一个好消息,所以这个问题不是真正的问题(除非在macOS或Windows上运行代码)…尽pipe64K子文件夹在recursion中可能会打开堆栈。

话虽如此,没有recursion选项仍然有优势,如能够轻松地增加处理的项目数量的限制,以及能够caching结果。

PS

根据评论,这是一个非recursion版本的代码,不检查循环层次结构。 速度更快,应该足够安全,可以在不允许硬链接到文件夹的Linux机器上使用。

 #include <dirent.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> int main(int argc, char const *argv[]) { /* print use instruction unless a folder name was given */ if (argc < 2) fprintf(stderr, "\nuse:\n" " %s <directory>\n" "for example:\n" " %s ./\n\n", argv[0], argv[0]), exit(0); /*************** a small linked list macro implementation ***************/ typedef struct list_s { struct list_s *next; struct list_s *prev; } list_s; #define LIST_INIT(name) \ { .next = &name, .prev = &name } #define LIST_PUSH(dest, node) \ do { \ (node)->next = (dest)->next; \ (node)->prev = (dest); \ (node)->next->prev = (node); \ (dest)->next = (node); \ } while (0); #define LIST_POP(list, var) \ if ((list)->next == (list)) { \ var = NULL; \ } else { \ var = (list)->next; \ (list)->next = var->next; \ var->next->prev = var->prev; \ } /*************** a record (file / folder) item type ***************/ typedef struct record_s { /* this is a flat processing queue. */ list_s queue; /* this will list all the completed items (siblings and such) */ list_s list; /* unique ID */ ino_t ino; /* name length */ size_t len; /* name string */ char name[]; } record_s; /* take a list_s pointer and convert it to the record_s pointer */ #define NODE2RECORD(node, list_name) \ ((record_s *)(((uintptr_t)(node)) - \ ((uintptr_t) & ((record_s *)0)->list_name))) /* initializes a new record */ #define RECORD_INIT(name) \ (record_s){.queue = LIST_INIT((name).queue), .list = LIST_INIT((name).list)} /*************** the actual code ***************/ record_s records = RECORD_INIT(records); record_s *pos, *item; list_s *tmp; DIR *dir; struct dirent *entry; /* initialize the root folder record and add it to the queue */ pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2); *pos = RECORD_INIT(*pos); pos->len = strlen(argv[1]); memcpy(pos->name, argv[1], pos->len); if (pos->name[pos->len - 1] != '/') pos->name[pos->len++] = '/'; pos->name[pos->len] = 0; /* push to queue, but also push to list (first item processed) */ LIST_PUSH(&records.queue, &pos->queue); LIST_PUSH(&records.list, &pos->list); /* as long as the queue has items to be processed, do so */ while (records.queue.next != &records.queue) { /* pop queued item */ LIST_POP(&records.queue, tmp); /* collect record to process */ pos = NODE2RECORD(tmp, queue); /* process the folder and add all folder data to current list */ dir = opendir(pos->name); if (!dir) continue; while ((entry = readdir(dir)) != NULL) { /* create new item, copying it's path data and unique ID */ item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2); *item = RECORD_INIT(*item); item->len = pos->len + entry->d_namlen; memcpy(item->name, pos->name, pos->len); memcpy(item->name + pos->len, entry->d_name, entry->d_namlen); item->name[item->len] = 0; item->ino = entry->d_ino; /* add item to the list, right after the `pos` item */ LIST_PUSH(&pos->list, &item->list); /* unless it's a folder, we're done. */ if (entry->d_type != DT_DIR) continue; /* test for '.' and '..' */ if (entry->d_name[0] == '.' && (entry->d_name[1] == 0 || (entry->d_name[1] == '.' && entry->d_name[2] == 0))) continue; /* add folder marker */ item->name[item->len++] = '/'; item->name[item->len] = 0; /* item is a new folder, add to queue */ LIST_PUSH(&records.queue, &item->queue); } closedir(dir); } /*************** Printing the results and cleaning up ***************/ while (records.list.next != &records.list) { /* pop list item */ LIST_POP(&records.list, tmp); /* collect record to process */ pos = NODE2RECORD(tmp, list); /* prepare for next iteration */ LIST_POP(&records.list, tmp); fwrite(pos->name, pos->len, 1, stderr); fwrite("\n", 1, 1, stderr); free(pos); } return 0; } 

这是一个使用recursion的简化版本,但使用的堆栈空间less得多:

 #include <errno.h> #include <stdio.h> #include <string.h> #include <sys/types.h> #include <unistd.h> #include <dirent.h> void listdir(char *path, size_t size) { DIR *dir; struct dirent *entry; size_t len = strlen(path); if (!(dir = opendir(path))) { fprintf(stderr, "path not found: %s: %s\n", path, strerror(errno)); return; } puts(path); while ((entry = readdir(dir)) != NULL) { char *name = entry->d_name; if (entry->d_type == DT_DIR) { if (!strcmp(name, ".") || !strcmp(name, "..")) continue; if (len + strlen(name) + 2 > size) { fprintf(stderr, "path too long: %s/%s\n", path, name); } else { path[len] = '/'; strcpy(path + len + 1, name); listdir(path, size); path[len] = '\0'; } } else { printf("%s/%s\n", path, name); } } closedir(dir); } int main(void) { char path[1024] = "."; listdir(path, sizeof path); return 0; } 

在我的系统上,它的输出和find .相同find .