@@ -0,0 +1,9 @@ | |||
# Haplous | |||
The Haplous parser, seperated from libheb12 if needed. | |||
Adds benchmarks, and fixes a memory leak (missing free() in test). | |||
# Building | |||
``` | |||
wget http://api.heb12.com/translations/haplous/kjv.txt | |||
cc *.c; ./a.out | |||
``` |
@@ -0,0 +1,198 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include "haplous.h" | |||
// seeks through a file line by line (with fgets) until it gets to the requested | |||
// book | |||
static int haplous_work_book_seek(FILE *file, char *id) | |||
{ | |||
int n = 0; | |||
char line[17]; // #book:exod (ids can be up to 10 characters long) | |||
while (fgets(line, 17, file)) { | |||
if (line[0] == '\n') { | |||
n++; | |||
continue; | |||
} | |||
if (strncmp(line, "#book:", 6) == 0) { | |||
for (size_t i = 0; i != 10; ++i) { | |||
if (line[i + 6] != id[i] || i == strlen(id)) { | |||
goto continue_while; | |||
} | |||
if (line[i + 7] == '\n') { | |||
return n; | |||
} | |||
} | |||
} | |||
continue_while: | |||
n++; | |||
} | |||
return HAPLOUS_REF_NOT_FOUND; | |||
} | |||
// run haplous_work_book_find first | |||
static int haplous_work_chapter_seek(FILE *file, size_t chapter) | |||
{ | |||
int n = 0; | |||
char line[13]; // #chapter:150 | |||
size_t c = 0; | |||
while (fgets(line, 13, file)) { | |||
if (strncmp(line, "#chapter:", 9) == 0) { | |||
c++; // increase it first since "chapter" starts at 1 | |||
if (c == chapter) { | |||
return n; | |||
} | |||
} | |||
n++; | |||
} | |||
return HAPLOUS_REF_NOT_FOUND; | |||
} | |||
// Get a range of verses | |||
// Allocates memory which caller owns in the end | |||
// TODO support custom allocators | |||
char *haplous_work_verses_get(FILE *file, struct haplous_reference ref, | |||
int *err) | |||
{ | |||
if (ref.verse_start == 0) { | |||
*err = HAPLOUS_INVALID_REF; | |||
return NULL; | |||
} | |||
if (ref.verse_start > ref.verse_end) { | |||
*err = HAPLOUS_INVALID_REF; | |||
return NULL; | |||
} | |||
size_t buf_size = 1000; | |||
char *buffer = calloc(buf_size, sizeof(char)); | |||
if (file == NULL) { | |||
*err = HAPLOUS_WORK_NOT_FOUND; | |||
return NULL; | |||
} | |||
fseek(file, 0, SEEK_SET); | |||
int line = 0; | |||
line = haplous_work_book_seek(file, ref.id); | |||
line = haplous_work_chapter_seek(file, ref.chapter); | |||
if (line < 0) { | |||
*err = HAPLOUS_REF_NOT_FOUND; | |||
return NULL; | |||
} | |||
int prev = '\0'; | |||
int c; | |||
size_t bufi = 0; | |||
size_t verse = 1; | |||
while ((c = getc(file)) != EOF) { | |||
// detect end of the chapter | |||
if (prev == '\n' && c == '^') { | |||
// if at the end of the chapter, make sure it has found | |||
// all the required verses | |||
// TODO test | |||
if (verse < ref.verse_end) { | |||
*err = HAPLOUS_END_TOO_BIG; // TODO maybe | |||
// OUT_OF_RANGE? | |||
return NULL; | |||
} | |||
} | |||
if (verse >= ref.verse_start && verse <= ref.verse_end) { | |||
if (bufi >= buf_size) { | |||
// Increase by 2 to make sure there's always | |||
// space for the NULL terminator | |||
buf_size += 2; | |||
buffer = realloc(buffer, buf_size); | |||
if (buffer == NULL) { | |||
*err = HAPLOUS_OUT_OF_MEMORY; | |||
return buffer; | |||
} | |||
} | |||
buffer[bufi] = (char)c; | |||
bufi++; | |||
} else if (verse >= ref.verse_end) { | |||
break; | |||
} | |||
if (c == '\n') { | |||
verse++; | |||
} | |||
prev = c; | |||
} | |||
buffer[bufi] = '\0'; | |||
*err = HAPLOUS_OK; | |||
return buffer; | |||
} | |||
// Get text from a full chapter separated by "\n" | |||
// disregards any verse information in ref | |||
// returns HAPLOUS_REF_NOT_FOUND upon errors | |||
// Allocates same as haplous_work_verses_get | |||
// It can return NULL, but never when an error is not set | |||
// TODO figure out error handling again | |||
// TODO decide whether or not to initialize buffer within or ourside of function | |||
char *haplous_work_chapter_get(FILE *file, struct haplous_reference ref, | |||
int *err) | |||
{ | |||
if (ref.chapter == 0) { | |||
*err = HAPLOUS_INVALID_REF; | |||
return NULL; | |||
} | |||
if (file == NULL) { | |||
*err = HAPLOUS_WORK_NOT_FOUND; | |||
return NULL; | |||
} | |||
fseek(file, 0, SEEK_SET); | |||
int line = 0; | |||
line = haplous_work_book_seek(file, ref.id); | |||
line = haplous_work_chapter_seek(file, ref.chapter); | |||
if (line < 0) { | |||
*err = HAPLOUS_REF_NOT_FOUND; | |||
return NULL; | |||
} | |||
size_t buf_size = 1000; | |||
char *buffer = malloc(buf_size); | |||
if (buffer == NULL) { | |||
*err = HAPLOUS_OUT_OF_MEMORY; | |||
return NULL; | |||
} | |||
int c; | |||
size_t i = 0; | |||
while ((c = getc(file)) != EOF) { | |||
if (c == '^') { | |||
break; | |||
} | |||
if (i >= buf_size) { | |||
buf_size += 1; | |||
buffer = realloc(buffer, buf_size); | |||
if (buffer == NULL) { | |||
*err = HAPLOUS_OUT_OF_MEMORY; | |||
return buffer; | |||
} | |||
} | |||
buffer[i] = (char)c; | |||
i++; | |||
} | |||
buffer[i] = '\0'; | |||
*err = HAPLOUS_OK; | |||
return buffer; | |||
} |
@@ -0,0 +1,71 @@ | |||
#include <stdbool.h> | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include "haplous.h" | |||
static bool strbool(const char *str, int *error) | |||
{ | |||
if (strcmp(str, "true") == 0) { | |||
return true; | |||
} else if (strcmp(str, "false") == 0) { | |||
return false; | |||
} | |||
*error = -1; | |||
return false; | |||
} | |||
static void check_or_err(const char *string, int *err) | |||
{ | |||
if (strcmp(string, "") == 0) { | |||
*err = HAPLOUS_META_MISSING; | |||
} | |||
} | |||
struct haplous_work haplous_work_init(const char *path, int *error) | |||
{ | |||
struct haplous_work w; | |||
w.path = path; | |||
w.file = fopen(path, "r"); | |||
if (w.file == NULL) { | |||
*error = HAPLOUS_WORK_NOT_FOUND; | |||
return w; | |||
} | |||
// strcpy is fine here because haplous_work_metadata_get ensures a NULL | |||
// terminator | |||
// TODO make error handling less verbose | |||
strcpy(w.metadata.lang, haplous_work_metadata_get(w.file, "lang")); | |||
check_or_err(w.metadata.lang, error); | |||
strcpy(w.metadata.title, haplous_work_metadata_get(w.file, "title")); | |||
check_or_err(w.metadata.title, error); | |||
char *public_domain_str = | |||
haplous_work_metadata_get(w.file, "public_domain"); | |||
check_or_err(public_domain_str, error); | |||
int err = 0; | |||
w.metadata.public_domain = strbool(public_domain_str, &err); | |||
if (err != 0) | |||
*error = HAPLOUS_META_MISSING; | |||
strcpy(w.metadata.id, haplous_work_metadata_get(w.file, "id")); | |||
check_or_err(w.metadata.id, error); | |||
strcpy(w.metadata.type, haplous_work_metadata_get(w.file, "type")); | |||
check_or_err(public_domain_str, error); | |||
*error = HAPLOUS_OK; | |||
return w; | |||
} | |||
// Free buffers and close files | |||
int haplous_work_cleanup(struct haplous_work *w) | |||
{ | |||
fclose(w->file); | |||
return 0; | |||
} |
@@ -0,0 +1,66 @@ | |||
#ifndef HAPLOUS_H_ | |||
#define HAPLOUS_H_ | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
#include <stdio.h> | |||
#include <stdbool.h> | |||
// numbers from the spec | |||
#define MAX_ID_LEN 20 | |||
enum haplous_error { | |||
HAPLOUS_OK = 0, | |||
HAPLOUS_OTHER_ERROR = -1, | |||
HAPLOUS_INVALID_REF = -2, | |||
HAPLOUS_REF_NOT_FOUND = -3, | |||
HAPLOUS_END_TOO_BIG = -4, | |||
HAPLOUS_INVALID_HAPLOUS = -5, | |||
HAPLOUS_META_MISSING = -6, | |||
HAPLOUS_META_FOUND_BOOK = -7, | |||
HAPLOUS_WORK_NOT_FOUND = -8, | |||
HAPLOUS_OUT_OF_MEMORY = -9, | |||
}; | |||
struct haplous_reference { | |||
char *id; | |||
size_t chapter; | |||
size_t verse_start; | |||
size_t verse_end; | |||
}; | |||
struct haplous_work_metadata { | |||
char lang[MAX_ID_LEN]; | |||
char title[MAX_ID_LEN]; | |||
char id[MAX_ID_LEN]; | |||
bool public_domain; | |||
char type[MAX_ID_LEN]; | |||
// this is only the required metadata, | |||
// other metadata may be available via work_metadata_get(id) | |||
}; | |||
struct haplous_work { | |||
const char *path; | |||
FILE *file; | |||
struct haplous_work_metadata metadata; | |||
}; | |||
struct haplous_work haplous_work_init(const char *, int *); | |||
int haplous_work_cleanup(struct haplous_work *); | |||
char *haplous_work_chapter_get(FILE *, struct haplous_reference, int *); | |||
char *haplous_work_verses_get(FILE *, struct haplous_reference, int *); | |||
char *haplous_work_metadata_get(FILE *, const char[MAX_ID_LEN]); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif |
@@ -0,0 +1,50 @@ | |||
// Getting metadata and other information from a Work | |||
#include <stdio.h> | |||
#include <string.h> | |||
#include "haplous.h" | |||
// Gets metadata in the form of `#id:value` from a work | |||
// Returns empty string if the ID is not found before a book | |||
// or if the associated value is empty | |||
char *haplous_work_metadata_get(FILE *file, const char id[MAX_ID_LEN]) | |||
{ | |||
fseek(file, 0, SEEK_SET); | |||
char line[MAX_ID_LEN * 2 + 1]; | |||
static char value[MAX_ID_LEN + 1]; | |||
strcpy(value, ""); | |||
char cmpid[MAX_ID_LEN + 2]; | |||
strcpy(cmpid, "#"); | |||
strncat(cmpid, id, MAX_ID_LEN); | |||
while (fgets(line, MAX_ID_LEN * 2 + 1, file)) { | |||
if (strncmp(line, "#book", 5) == 0) { | |||
break; | |||
} | |||
size_t index = 0; | |||
while (line[index] != '\0' && index < MAX_ID_LEN + 2) { | |||
if (line[index] == ':') | |||
break; | |||
index++; | |||
} | |||
// index must be greater than 2 to allow for at least #i:v | |||
if (index > 2 && strncmp(line, cmpid, index - 1) == 0) { | |||
size_t i = index + 1; | |||
while (i < index + MAX_ID_LEN && line[i] != '\0' | |||
&& line[i] != '\n') { | |||
value[i - index - 1] = line[i]; | |||
i++; | |||
} | |||
value[i - index - 1] = '\0'; | |||
break; | |||
} | |||
} | |||
value[MAX_ID_LEN] = '\0'; // just in case | |||
return value; | |||
} |
@@ -0,0 +1,50 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <assert.h> | |||
#include <time.h> | |||
#include "haplous.h" | |||
struct haplous_work work; | |||
int err = 0; | |||
void testRef(char *name, int chapter, int start, int to) | |||
{ | |||
struct haplous_reference ref = { | |||
name, | |||
chapter, | |||
start, | |||
to, | |||
}; | |||
char *text = | |||
haplous_work_verses_get(work.file, ref, &err); | |||
if (err != HAPLOUS_OK) { | |||
printf("Error parsing: %s %d %d:%d (%d)\n", name, chapter, start, to, err); | |||
} | |||
// Demo didn't have this - leaked a lot of memory.. | |||
free(text); | |||
} | |||
int main() | |||
{ | |||
work = haplous_work_init("kjv.txt", &err); | |||
if (err != HAPLOUS_OK) { | |||
puts("Err"); | |||
return -1; | |||
} | |||
clock_t start_time = clock(); | |||
for (int i = 0; i < 2; i++) { | |||
testRef("Rev", 1, 1, 1); | |||
} | |||
double elapsed_time = | |||
(double)(clock() - start_time) / CLOCKS_PER_SEC; | |||
printf("Done in %f seconds\n", elapsed_time); | |||
haplous_work_cleanup(&work); | |||
} |