在C中读取.CSV文件

我有一个.csv文件:

lp;imie;nazwisko;ulica;numer;kod;miejscowosc;telefon;email;data_ur 1;Jan;Kowalski;ul. Nowa;1a;11-234;Budry;123-123-456;jan@go.xxx;1980.05.13 2;Jerzy;Nowak;ul. Konopnicka;13a/3;00-900;Lichowice;(55)333-44-55;jer@wu.to;1990.03.23 

我需要在C中读取。我有一些代码,但仅用于连接。

希望这会让你开始

http://ideone.com/l23He上查看(使用stdin)

 #include <stdio.h> #include <stdlib.h> #include <string.h> const char* getfield(char* line, int num) { const char* tok; for (tok = strtok(line, ";"); tok && *tok; tok = strtok(NULL, ";\n")) { if (!--num) return tok; } return NULL; } int main() { FILE* stream = fopen("input", "r"); char line[1024]; while (fgets(line, 1024, stream)) { char* tmp = strdup(line); printf("Field 3 would be %s\n", getfield(tmp, 3)); // NOTE strtok clobbers tmp free(tmp); } } 

输出:

 Field 3 would be nazwisko Field 3 would be Kowalski Field 3 would be Nowak 
 /* csv - read write comma separated value format * Copyright (c) 2003 Michael B. Allen <mba2000 ioplex.com> * * The MIT License * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <stdlib.h> #include <string.h> #include <stdio.h> #include <ctype.h> #include <errno.h> #include <wchar.h> #include <wctype.h> #include "mba/msgno.h" #include "mba/csv.h" #define ST_START 1 #define ST_COLLECT 2 #define ST_TAILSPACE 3 #define ST_END_QUOTE 4 struct sinput { FILE *in; const unsigned char *src; size_t sn; size_t count; }; struct winput { const wchar_t *src; size_t sn; size_t count; }; static int snextch(struct sinput *in) { int ch; if (in->in) { if ((ch = fgetc(in->in)) == EOF) { if (ferror(in->in)) { PMNO(errno); return -1; } return 0; } } else { if (in->sn == 0) { return 0; } ch = *(in->src)++; in->sn--; } in->count++; return ch; } static int wnextch(struct winput *in) { int ch; if (in->sn == 0) { return 0; } ch = *(in->src)++; in->sn--; in->count++; return ch; } static int csv_parse_str(struct sinput *in, unsigned char *buf, size_t bn, unsigned char *row[], int rn, int sep, int flags) { int trim, quotes, ch, state, r, j, t, inquotes; trim = flags & CSV_TRIM; quotes = flags & CSV_QUOTES; state = ST_START; inquotes = 0; ch = r = j = t = 0; memset(row, 0, sizeof(unsigned char *) * rn); while (rn && bn && (ch = snextch(in)) > 0) { switch (state) { case ST_START: if (ch != '\n' && ch != sep && isspace(ch)) { if (!trim) { buf[j++] = ch; bn--; t = j; } break; } else if (quotes && ch == '"') { j = t = 0; state = ST_COLLECT; inquotes = 1; break; } state = ST_COLLECT; case ST_COLLECT: if (inquotes) { if (ch == '"') { state = ST_END_QUOTE; break; } } else if (ch == sep || ch == '\n') { row[r++] = buf; rn--; if (ch == '\n' && t && buf[t - 1] == '\r') { t--; bn++; /* crlf -> lf */ } buf[t] = '\0'; bn--; buf += t + 1; j = t = 0; state = ST_START; inquotes = 0; if (ch == '\n') { rn = 0; } break; } else if (quotes && ch == '"') { PMNF(errno = EILSEQ, ": unexpected quote in element %d", (r + 1)); return -1; } buf[j++] = ch; bn--; if (!trim || isspace(ch) == 0) { t = j; } break; case ST_TAILSPACE: case ST_END_QUOTE: if (ch == sep || ch == '\n') { row[r++] = buf; rn--; buf[j] = '\0'; bn--; buf += j + 1; j = t = 0; state = ST_START; inquotes = 0; if (ch == '\n') { rn = 0; } break; } else if (quotes && ch == '"' && state != ST_TAILSPACE) { buf[j++] = '"'; bn--; /* nope, just an escaped quote */ t = j; state = ST_COLLECT; break; } else if (isspace(ch)) { state = ST_TAILSPACE; break; } errno = EILSEQ; PMNF(errno, ": bad end quote in element %d", (r + 1)); return -1; } } if (ch == -1) { AMSG(""); return -1; } if (bn == 0) { PMNO(errno = E2BIG); return -1; } if (rn) { if (inquotes && state != ST_END_QUOTE) { PMNO(errno = EILSEQ); return -1; } row[r] = buf; buf[t] = '\0'; } return in->count; } static int csv_parse_wcs(struct winput *in, wchar_t *buf, size_t bn, wchar_t *row[], int rn, wint_t sep, int flags) { int trim, quotes, state, r, j, t, inquotes; wint_t ch; trim = flags & CSV_TRIM; quotes = flags & CSV_QUOTES; state = ST_START; inquotes = 0; ch = r = j = t = 0; memset(row, 0, sizeof(wchar_t *) * rn); while (rn && bn && (ch = wnextch(in)) > 0) { switch (state) { case ST_START: if (ch != L'\n' && ch != sep && iswspace(ch)) { if (!trim) { buf[j++] = ch; bn--; t = j; } break; } else if (quotes && ch == L'"') { j = t = 0; state = ST_COLLECT; inquotes = 1; break; } state = ST_COLLECT; case ST_COLLECT: if (inquotes) { if (ch == L'"') { state = ST_END_QUOTE; break; } } else if (ch == sep || ch == L'\n') { row[r++] = buf; rn--; buf[t] = L'\0'; bn--; buf += t + 1; j = t = 0; state = ST_START; inquotes = 0; if (ch == L'\n') { rn = 0; } break; } else if (quotes && ch == L'"') { PMNF(errno = EILSEQ, ": unexpected quote in element %d", (r + 1)); return -1; } buf[j++] = ch; bn--; if (!trim || iswspace(ch) == 0) { t = j; } break; case ST_TAILSPACE: case ST_END_QUOTE: if (ch == sep || ch == L'\n') { row[r++] = buf; rn--; buf[j] = L'\0'; bn--; buf += j + 1; j = t = 0; state = ST_START; inquotes = 0; if (ch == L'\n') { rn = 0; } break; } else if (quotes && ch == L'"' && state != ST_TAILSPACE) { buf[j++] = L'"'; bn--; /* nope, just an escaped quote */ t = j; state = ST_COLLECT; break; } else if (iswspace(ch)) { state = ST_TAILSPACE; break; } PMNF(errno = EILSEQ, ": bad end quote in element %d", (r + 1)); return -1; } } if (ch == (wint_t)-1) { AMSG(""); return -1; } if (bn == 0) { PMNO(errno = E2BIG); return -1; } if (rn) { if (inquotes && state != ST_END_QUOTE) { PMNO(errno = EILSEQ); return -1; } row[r] = buf; buf[t] = L'\0'; } return in->count; } int csv_row_parse_wcs(const wchar_t *src, size_t sn, wchar_t *buf, size_t bn, wchar_t *row[], int rn, int sep, int trim) { struct winput input; input.src = src; input.sn = sn; input.count = 0; return csv_parse_wcs(&input, buf, bn, row, rn, (wint_t)sep, trim); } int csv_row_parse_str(const unsigned char *src, size_t sn, unsigned char *buf, size_t bn, unsigned char *row[], int rn, int sep, int trim) { struct sinput input; input.in = NULL; input.src = src; input.sn = sn; input.count = 0; return csv_parse_str(&input, buf, bn, row, rn, sep, trim); } int csv_row_fread(FILE *in, unsigned char *buf, size_t bn, unsigned char *row[], int numcols, int sep, int trim) { struct sinput input; input.in = in; input.count = 0; return csv_parse_str(&input, buf, bn, row, numcols, sep, trim); } 

下面的代码是纯C语言并处理空格。 它只分配一次内存,所以每个处理过的行都需要一个空闲()。

http://ideone.com/mSCgPM

 /* Tiny CSV Reader */ /* Copyright (C) 2015, Deligiannidis Konstantinos This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://w...content-available-to-author-only...u.org/licenses/>. */ #include <stdio.h> #include <string.h> #include <stdlib.h> /* For more that 100 columns or lines (when delimiter = \n), minor modifications are needed. */ int getcols( const char * const line, const char * const delim, char ***out_storage ) { const char *start_ptr, *end_ptr, *iter; char **out; int i; //For "for" loops in the old c style. int tokens_found = 1, delim_size, line_size; //Calculate "line_size" indirectly, without strlen() call. int start_idx[100], end_idx[100]; //Store the indexes of tokens. Example "Power;": loc('P')=1, loc(';')=6 //Change 100 with MAX_TOKENS or use malloc() for more than 100 tokens. Example: "b1;b2;b3;...;b200" if ( *out_storage != NULL ) return -4; //This SHOULD be NULL: Not Already Allocated if ( !line || !delim ) return -1; //NULL pointers Rejected Here if ( (delim_size = strlen( delim )) == 0 ) return -2; //Delimiter not provided start_ptr = line; //Start visiting input. We will distinguish tokens in a single pass, for good performance. //Then we are allocating one unified memory region & doing one memory copy. while ( ( end_ptr = strstr( start_ptr, delim ) ) ) { start_idx[ tokens_found -1 ] = start_ptr - line; //Store the Index of current token end_idx[ tokens_found - 1 ] = end_ptr - line; //Store Index of first character that will be replaced with //'\0'. Example: "arg1||arg2||end" -> "arg1\0|arg2\0|end" tokens_found++; //Accumulate the count of tokens. start_ptr = end_ptr + delim_size; //Set pointer to the next c-string within the line } for ( iter = start_ptr; (*iter!='\0') ; iter++ ); start_idx[ tokens_found -1 ] = start_ptr - line; //Store the Index of current token: of last token here. end_idx[ tokens_found -1 ] = iter - line; //and the last element that will be replaced with \0 line_size = iter - line; //Saving CPU cycles: Indirectly Count the size of *line without using strlen(); int size_ptr_region = (1 + tokens_found)*sizeof( char* ); //The size to store pointers to c-strings + 1 (*NULL). out = (char**) malloc( size_ptr_region + ( line_size + 1 ) + 5 ); //Fit everything there...it is all memory. //It reserves a contiguous space for both (char**) pointers AND string region. 5 Bytes for "Out of Range" tests. *out_storage = out; //Update the char** pointer of the caller function. //"Out of Range" TEST. Verify that the extra reserved characters will not be changed. Assign Some Values. //char *extra_chars = (char*) out + size_ptr_region + ( line_size + 1 ); //extra_chars[0] = 1; extra_chars[1] = 2; extra_chars[2] = 3; extra_chars[3] = 4; extra_chars[4] = 5; for ( i = 0; i < tokens_found; i++ ) //Assign adresses first part of the allocated memory pointers that point to out[ i ] = (char*) out + size_ptr_region + start_idx[ i ]; //the second part of the memory, reserved for Data. out[ tokens_found ] = (char*) NULL; //[ ptr1, ptr2, ... , ptrN, (char*) NULL, ... ]: We just added the (char*) NULL. //Now assign the Data: c-strings. (\0 terminated strings): char *str_region = (char*) out + size_ptr_region; //Region inside allocated memory which contains the String Data. memcpy( str_region, line, line_size ); //Copy input with delimiter characters: They will be replaced with \0. //Now we should replace: "arg1||arg2||arg3" with "arg1\0|arg2\0|arg3". Don't worry for characters after '\0' //They are not used in standard c lbraries. for( i = 0; i < tokens_found; i++) str_region[ end_idx[ i ] ] = '\0'; //"Out of Range" TEST. Wait until Assigned Values are Printed back. //for ( int i=0; i < 5; i++ ) printf("c=%x ", extra_chars[i] ); printf("\n"); // *out memory should now contain (example data): //[ ptr1, ptr2,...,ptrN, (char*) NULL, "token1\0", "token2\0",...,"tokenN\0", 5 bytes for tests ] // |__________________________________^ ^ ^ ^ // |_______________________________________| | | // |_____________________________________________| These 5 Bytes should be intact. return tokens_found; } int main() { char in_line[] = "Arg1;;Th;s is not Del;m;ter;;Arg3;;;;Final"; char delim[] = ";;"; char **columns; int i; printf("Example1:\n"); columns = NULL; //Should be NULL to indicate that it is not assigned to allocated memory. Otherwise return -4; int cols_found = getcols( in_line, delim, &columns); for ( i = 0; i < cols_found; i++ ) printf("Column[ %d ] = %s\n", i, columns[ i ] ); //<- (1st way). // (2nd way) // for ( i = 0; columns[ i ]; i++) printf("start_idx[ %d ] = %s\n", i, columns[ i ] ); free( columns ); //Release the Single Contiguous Memory Space. columns = NULL; //Pointer = NULL to indicate it does not reserve space and that is ready for the next malloc(). printf("\n\nExample2, Nested:\n\n"); char example_file[] = "ID;Day;Month;Year;Telephone;email;Date of registration\n" "1;Sunday;january;2009;123-124-456;jitter@go.xyz;2015-05-13\n" "2;Monday;March;2011;(+30)333-22-55;buffer@wl.it;2009-05-23"; char **rows; int j; rows = NULL; //getcols() requires it to be NULL. (Avoid dangling pointers, leaks etc). getcols( example_file, "\n", &rows); for ( i = 0; rows[ i ]; i++) { { printf("Line[ %d ] = %s\n", i, rows[ i ] ); char **columnX = NULL; getcols( rows[ i ], ";", &columnX); for ( j = 0; columnX[ j ]; j++) printf(" Col[ %d ] = %s\n", j, columnX[ j ] ); free( columnX ); } } free( rows ); rows = NULL; return 0; } 
 ifstream fs(filenema); string line = ""; while (getline(fs, line)) { stringstream linestream(line); string token = ""; while (getline(linestream, token, ';')) { ... } } 

一个完整的例子,将字段保留为原始input缓冲区中以NULL结尾的string,并通过一个char指针数组提供对它们的访问。 CSV处理器已经被确认可以与“双引号”中的字段一起使用,忽略了其中的任何分隔字符。

 #include <stdio.h> #include <stdlib.h> #include <string.h> // adjust BUFFER_SIZE to suit longest line #define BUFFER_SIZE 1024 * 1024 #define NUM_FIELDS 10 #define MAXERRS 5 #define RET_OK 0 #define RET_FAIL 1 #define FALSE 0 #define TRUE 1 // char* array will point to fields char *pFields[NUM_FIELDS]; // field offsets into pFields array: #define LP 0 #define IMIE 1 #define NAZWISKo 2 #define ULICA 3 #define NUMER 4 #define KOD 5 #define MIEJSCOw 6 #define TELEFON 7 #define EMAIL 8 #define DATA_UR 9 long loadFile(FILE *pFile, long *errcount); static int loadValues(char *line, long lineno); static char delim; long loadFile(FILE *pFile, long *errcount){ char sInputBuf [BUFFER_SIZE]; long lineno = 0L; if(pFile == NULL) return RET_FAIL; while (!feof(pFile)) { // load line into static buffer if(fgets(sInputBuf, BUFFER_SIZE-1, pFile)==NULL) break; // skip first line (headers) if(++lineno==1) continue; // jump over empty lines if(strlen(sInputBuf)==0) continue; // set pFields array pointers to null-terminated string fields in sInputBuf if(loadValues(sInputBuf,lineno)==RET_FAIL){ (*errcount)++; if(*errcount > MAXERRS) break; } else { // On return pFields array pointers point to loaded fields ready for load into DB or whatever // Fields can be accessed via pFields, eg printf("lp=%s, imie=%s, data_ur=%s\n", pFields[LP], pFields[IMIE], pFields[DATA_UR]); } } return lineno; } static int loadValues(char *line, long lineno){ if(line == NULL) return RET_FAIL; // chop of last char of input if it is a CR or LF (egWindows file loading in Unix env.) // can be removed if sure fgets has removed both CR and LF from end of line if(*(line + strlen(line)-1) == '\r' || *(line + strlen(line)-1) == '\n') *(line + strlen(line)-1) = '\0'; if(*(line + strlen(line)-1) == '\r' || *(line + strlen(line)-1 )== '\n') *(line + strlen(line)-1) = '\0'; char *cptr = line; int fld = 0; int inquote = FALSE; char ch; pFields[fld]=cptr; while((ch=*cptr) != '\0' && fld < NUM_FIELDS){ if(ch == '"') { if(! inquote) pFields[fld]=cptr+1; else { *cptr = '\0'; // zero out " and jump over it } inquote = ! inquote; } else if(ch == delim && ! inquote){ *cptr = '\0'; // end of field, null terminate it pFields[++fld]=cptr+1; } cptr++; } if(fld > NUM_FIELDS-1){ fprintf(stderr, "Expected field count (%d) exceeded on line %ld\n", NUM_FIELDS, lineno); return RET_FAIL; } else if (fld < NUM_FIELDS-1){ fprintf(stderr, "Expected field count (%d) not reached on line %ld\n", NUM_FIELDS, lineno); return RET_FAIL; } return RET_OK; } int main(int argc, char **argv) { FILE *fp; long errcount = 0L; long lines = 0L; if(argc!=3){ printf("Usage: %s csvfilepath delimiter\n", basename(argv[0])); return (RET_FAIL); } if((delim=argv[2][0])=='\0'){ fprintf(stderr,"delimiter must be specified\n"); return (RET_FAIL); } fp = fopen(argv[1] , "r"); if(fp == NULL) { fprintf(stderr,"Error opening file: %d\n",errno); return(RET_FAIL); } lines=loadFile(fp,&errcount); fclose(fp); printf("Processed %ld lines, encountered %ld error(s)\n", lines, errcount); if(errcount>0) return(RET_FAIL); return(RET_OK); } 

以为我会分享这个代码。 这很简单,但很有效。 它用逗号分隔逗号分隔的文件。 您可以轻松修改它以适应您的需求。

 #include <stdio.h> #include <stdlib.h> #include <string.h> int main(int argc, char *argv[]) { //argv[1] path to csv file //argv[2] number of lines to skip //argv[3] length of longest value (in characters) FILE *pfinput; unsigned int nSkipLines, currentLine, lenLongestValue; char *pTempValHolder; int c; unsigned int vcpm; //value character marker int QuotationOnOff; //0 - off, 1 - on nSkipLines = atoi(argv[2]); lenLongestValue = atoi(argv[3]); pTempValHolder = (char*)malloc(lenLongestValue); if( pfinput = fopen(argv[1],"r") ) { rewind(pfinput); currentLine = 1; vcpm = 0; QuotationOnOff = 0; //currentLine > nSkipLines condition skips ignores first argv[2] lines while( (c = fgetc(pfinput)) != EOF) { switch(c) { case ',': if(!QuotationOnOff && currentLine > nSkipLines) { pTempValHolder[vcpm] = '\0'; printf("%s,",pTempValHolder); vcpm = 0; } break; case '\n': if(currentLine > nSkipLines) { pTempValHolder[vcpm] = '\0'; printf("%s\n",pTempValHolder); vcpm = 0; } currentLine++; break; case '\"': if(currentLine > nSkipLines) { if(!QuotationOnOff) { QuotationOnOff = 1; pTempValHolder[vcpm] = c; vcpm++; } else { QuotationOnOff = 0; pTempValHolder[vcpm] = c; vcpm++; } } break; default: if(currentLine > nSkipLines) { pTempValHolder[vcpm] = c; vcpm++; } break; } } fclose(pfinput); free(pTempValHolder); } return 0; }