Code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
/**
* Read the next identifier from the stream.
*
* This function is somewhat crude, but it gets the job done.
* It scans for identifiers based on the following rules, taken
* from the C language standard:
*
* (6.4.2.1) identifier:
* identifier-nondigit
* identifier identifier-nondigit
* identifier digit
*
* (6.4.2.1) identifier-nondigit:
* nondigit
* universal-character-name
* other implementation-defined characters
*
* (6.4.2.1) nondigit: one of
* _ a b c d e f g h i j k l m
* n o p q r s t u v w x y z
* A B C D E F G H I J K L M
* N O P Q R S T U V W X Y Z
*
* (6.4.2.1) digit: one of
* 0 1 2 3 4 5 6 7 8 9
*/
char *getIdentifier(FILE *stream, char *buf, size_t bufsize)
{
size_t i = 0;
enum {START, PREPROC, STRING, CHAR, SLASH, STAR, LINE_COMMENT, BLOCK_COMMENT, IDENTIFIER} state = START;
int c;
int done = 0;
while (!done && (c = fgetc(stream)) != EOF && i < (bufsize - 1))
{
switch(state)
{
case START:
if (c == '_' || isalpha(c))
{
buf[i++] = c;
state = IDENTIFIER;
}
else if (c == '/')
state = SLASH;
else if (c == '#')
state = PREPROC;
else if (c == '"')
state = STRING;
else if (c == '\'')
state = CHAR;
else if (c == '\\')
c = fgetc(stream); /* Throw away the next character */
break;
case SLASH:
if (c == '/')
state = LINE_COMMENT;
else if (c == '*')
state = BLOCK_COMMENT;
else
state = START;
break;
case LINE_COMMENT:
if (c == '\n')
state = START;
break;
case BLOCK_COMMENT:
if (c == '*')
state = STAR;
break;
case STAR:
if (c == '/')
state = START;
else
state = BLOCK_COMMENT;
break;
case PREPROC:
if (c == '\n')
state = START;
break;
case IDENTIFIER:
if (c == '_' || isalnum(c))
buf[i++] = c;
else
done=1;
break;
case STRING:
if (c == '\\')
fgetc(stream); /* Throw away the next character */
else if (c == '"')
state = START;
break;
case CHAR:
if (c == '\\')
fgetc(stream); /* Throw away the next character */
else if (c == '\'')
state = START;
break;
default:
break;
}
}
if (c == EOF && i == 0)
return NULL;
else
buf[i] = 0;
return buf;
}
/**
* Open the specified file and scan it for keywords.
*/
int main(int argc, char **argv)
{
struct keywordEntry {
char *keyword;
size_t count;
};
FILE *sourceFile;
char buffer[81]; /* no individual token should be longer than 80 chars */
size_t total = 0;
size_t i;
struct keywordEntry keywordTable[] = {
{"auto", 0},
{"break", 0},
{"case", 0},
{"char", 0},
{"const", 0},
{"continue", 0},
{"default", 0},
{"do", 0},
{"double", 0},
{"else", 0},
{"enum", 0},
{"extern", 0},
{"float", 0},
{"for", 0},
{"goto", 0},
{"if", 0},
{"inline", 0},
{"int", 0},
{"long", 0},
{"register", 0},
{"restrict", 0},
{"return", 0},
{"short", 0},
{"signed", 0},
{"sizeof", 0},
{"static", 0},
{"struct", 0},
{"switch", 0},
{"typedef", 0},
{"union", 0},
{"unsigned", 0},
{"void", 0},
{"volatile", 0},
{"while", 0},
{"_Bool", 0},
{"_Complex", 0},
{"_Imaginary", 0},
{NULL, 0}
};
if (argc < 2)
{
fprintf(stderr, "USAGE: %s sourcefile\n", argv[0]);
exit(EXIT_FAILURE);
}
sourceFile = fopen(argv[1], "r");
if (!sourceFile)
{
fprintf(stderr, "Could not open file %s\n", argv[1]);
exit(EXIT_FAILURE);
}
/**
* Read the next identifier from the input stream.
* If it matches one of the specified keywords,
* update that keyword's count.
*/
while (getIdentifier(sourceFile, buffer, sizeof buffer))
{
for (i = 0; keywordTable[i].keyword != NULL; i++)
{
if (strcmp(buffer, keywordTable[i].keyword) == 0)
{
keywordTable[i].count++;
total++;
}
}
}
if (ferror(sourceFile))
{
fprintf(stderr, "Error while reading %s; aborting\n", argv[1]);
exit(EXIT_FAILURE);
}
fclose(sourceFile);
printf("%20s%10s\n", "Keyword", "Count");
printf("%20s%10s\n", "-------", "-----");
for (i = 0; keywordTable[i].keyword != NULL; i++)
if (keywordTable[i].count > 0)
printf("%20s%10d\n", keywordTable[i].keyword, keywordTable[i].count);
printf("\nTotal: %d\n", total);
return 0;
}
And here are the results of running the program on its own source text: