Browse Source

Initial

master
Daniel 1 year ago
commit
454e8a00ab
6 changed files with 444 additions and 0 deletions
  1. +9
    -0
      README.md
  2. +198
    -0
      get.c
  3. +71
    -0
      haplous.c
  4. +66
    -0
      haplous.h
  5. +50
    -0
      info.c
  6. +50
    -0
      test.c

+ 9
- 0
README.md View File

@@ -0,0 +1,9 @@
# Haplous
The Haplous parser, seperated from libheb12 if needed.
Adds benchmarks, and fixes a memory leak (missing free() in test).

# Building
```
wget http://api.heb12.com/translations/haplous/kjv.txt
cc *.c; ./a.out
```

+ 198
- 0
get.c View File

@@ -0,0 +1,198 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "haplous.h"

// seeks through a file line by line (with fgets) until it gets to the requested
// book
static int haplous_work_book_seek(FILE *file, char *id)
{
int n = 0;
char line[17]; // #book:exod (ids can be up to 10 characters long)
while (fgets(line, 17, file)) {
if (line[0] == '\n') {
n++;
continue;
}

if (strncmp(line, "#book:", 6) == 0) {
for (size_t i = 0; i != 10; ++i) {
if (line[i + 6] != id[i] || i == strlen(id)) {
goto continue_while;
}

if (line[i + 7] == '\n') {
return n;
}
}
}

continue_while:
n++;
}

return HAPLOUS_REF_NOT_FOUND;
}

// run haplous_work_book_find first
static int haplous_work_chapter_seek(FILE *file, size_t chapter)
{
int n = 0;
char line[13]; // #chapter:150

size_t c = 0;

while (fgets(line, 13, file)) {
if (strncmp(line, "#chapter:", 9) == 0) {
c++; // increase it first since "chapter" starts at 1
if (c == chapter) {
return n;
}
}

n++;
}

return HAPLOUS_REF_NOT_FOUND;
}

// Get a range of verses
// Allocates memory which caller owns in the end
// TODO support custom allocators
char *haplous_work_verses_get(FILE *file, struct haplous_reference ref,
int *err)
{
if (ref.verse_start == 0) {
*err = HAPLOUS_INVALID_REF;
return NULL;
}
if (ref.verse_start > ref.verse_end) {
*err = HAPLOUS_INVALID_REF;
return NULL;
}

size_t buf_size = 1000;
char *buffer = calloc(buf_size, sizeof(char));

if (file == NULL) {
*err = HAPLOUS_WORK_NOT_FOUND;
return NULL;
}
fseek(file, 0, SEEK_SET);
int line = 0;
line = haplous_work_book_seek(file, ref.id);
line = haplous_work_chapter_seek(file, ref.chapter);
if (line < 0) {
*err = HAPLOUS_REF_NOT_FOUND;
return NULL;
}

int prev = '\0';
int c;
size_t bufi = 0;
size_t verse = 1;
while ((c = getc(file)) != EOF) {
// detect end of the chapter
if (prev == '\n' && c == '^') {

// if at the end of the chapter, make sure it has found
// all the required verses
// TODO test
if (verse < ref.verse_end) {
*err = HAPLOUS_END_TOO_BIG; // TODO maybe
// OUT_OF_RANGE?
return NULL;
}
}

if (verse >= ref.verse_start && verse <= ref.verse_end) {
if (bufi >= buf_size) {
// Increase by 2 to make sure there's always
// space for the NULL terminator
buf_size += 2;
buffer = realloc(buffer, buf_size);
if (buffer == NULL) {
*err = HAPLOUS_OUT_OF_MEMORY;
return buffer;
}
}
buffer[bufi] = (char)c;
bufi++;
} else if (verse >= ref.verse_end) {
break;
}

if (c == '\n') {
verse++;
}

prev = c;
}

buffer[bufi] = '\0';

*err = HAPLOUS_OK;
return buffer;
}

// Get text from a full chapter separated by "\n"
// disregards any verse information in ref
// returns HAPLOUS_REF_NOT_FOUND upon errors
// Allocates same as haplous_work_verses_get
// It can return NULL, but never when an error is not set
// TODO figure out error handling again
// TODO decide whether or not to initialize buffer within or ourside of function
char *haplous_work_chapter_get(FILE *file, struct haplous_reference ref,
int *err)
{
if (ref.chapter == 0) {
*err = HAPLOUS_INVALID_REF;
return NULL;
}

if (file == NULL) {
*err = HAPLOUS_WORK_NOT_FOUND;
return NULL;
}
fseek(file, 0, SEEK_SET);
int line = 0;
line = haplous_work_book_seek(file, ref.id);
line = haplous_work_chapter_seek(file, ref.chapter);
if (line < 0) {
*err = HAPLOUS_REF_NOT_FOUND;
return NULL;
}

size_t buf_size = 1000;
char *buffer = malloc(buf_size);
if (buffer == NULL) {
*err = HAPLOUS_OUT_OF_MEMORY;
return NULL;
}

int c;
size_t i = 0;
while ((c = getc(file)) != EOF) {
if (c == '^') {
break;
}

if (i >= buf_size) {
buf_size += 1;
buffer = realloc(buffer, buf_size);
if (buffer == NULL) {
*err = HAPLOUS_OUT_OF_MEMORY;
return buffer;
}
}

buffer[i] = (char)c;
i++;
}

buffer[i] = '\0';

*err = HAPLOUS_OK;
return buffer;
}

+ 71
- 0
haplous.c View File

@@ -0,0 +1,71 @@
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "haplous.h"

static bool strbool(const char *str, int *error)
{
if (strcmp(str, "true") == 0) {
return true;
} else if (strcmp(str, "false") == 0) {
return false;
}

*error = -1;
return false;
}

static void check_or_err(const char *string, int *err)
{
if (strcmp(string, "") == 0) {
*err = HAPLOUS_META_MISSING;
}
}

struct haplous_work haplous_work_init(const char *path, int *error)
{
struct haplous_work w;

w.path = path;
w.file = fopen(path, "r");
if (w.file == NULL) {
*error = HAPLOUS_WORK_NOT_FOUND;
return w;
}

// strcpy is fine here because haplous_work_metadata_get ensures a NULL
// terminator
// TODO make error handling less verbose
strcpy(w.metadata.lang, haplous_work_metadata_get(w.file, "lang"));
check_or_err(w.metadata.lang, error);

strcpy(w.metadata.title, haplous_work_metadata_get(w.file, "title"));
check_or_err(w.metadata.title, error);

char *public_domain_str =
haplous_work_metadata_get(w.file, "public_domain");
check_or_err(public_domain_str, error);

int err = 0;
w.metadata.public_domain = strbool(public_domain_str, &err);
if (err != 0)
*error = HAPLOUS_META_MISSING;

strcpy(w.metadata.id, haplous_work_metadata_get(w.file, "id"));
check_or_err(w.metadata.id, error);

strcpy(w.metadata.type, haplous_work_metadata_get(w.file, "type"));
check_or_err(public_domain_str, error);

*error = HAPLOUS_OK;
return w;
}

// Free buffers and close files
int haplous_work_cleanup(struct haplous_work *w)
{
fclose(w->file);
return 0;
}

+ 66
- 0
haplous.h View File

@@ -0,0 +1,66 @@
#ifndef HAPLOUS_H_
#define HAPLOUS_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stdio.h>
#include <stdbool.h>

// numbers from the spec
#define MAX_ID_LEN 20

enum haplous_error {
HAPLOUS_OK = 0,
HAPLOUS_OTHER_ERROR = -1,
HAPLOUS_INVALID_REF = -2,
HAPLOUS_REF_NOT_FOUND = -3,
HAPLOUS_END_TOO_BIG = -4,

HAPLOUS_INVALID_HAPLOUS = -5,

HAPLOUS_META_MISSING = -6,
HAPLOUS_META_FOUND_BOOK = -7,

HAPLOUS_WORK_NOT_FOUND = -8,

HAPLOUS_OUT_OF_MEMORY = -9,
};

struct haplous_reference {
char *id;
size_t chapter;
size_t verse_start;
size_t verse_end;
};

struct haplous_work_metadata {
char lang[MAX_ID_LEN];
char title[MAX_ID_LEN];
char id[MAX_ID_LEN];
bool public_domain;
char type[MAX_ID_LEN];
// this is only the required metadata,
// other metadata may be available via work_metadata_get(id)
};

struct haplous_work {
const char *path;
FILE *file;
struct haplous_work_metadata metadata;
};

struct haplous_work haplous_work_init(const char *, int *);
int haplous_work_cleanup(struct haplous_work *);

char *haplous_work_chapter_get(FILE *, struct haplous_reference, int *);
char *haplous_work_verses_get(FILE *, struct haplous_reference, int *);

char *haplous_work_metadata_get(FILE *, const char[MAX_ID_LEN]);

#ifdef __cplusplus
}
#endif

#endif

+ 50
- 0
info.c View File

@@ -0,0 +1,50 @@
// Getting metadata and other information from a Work
#include <stdio.h>
#include <string.h>
#include "haplous.h"

// Gets metadata in the form of `#id:value` from a work
// Returns empty string if the ID is not found before a book
// or if the associated value is empty
char *haplous_work_metadata_get(FILE *file, const char id[MAX_ID_LEN])
{
fseek(file, 0, SEEK_SET);

char line[MAX_ID_LEN * 2 + 1];
static char value[MAX_ID_LEN + 1];
strcpy(value, "");

char cmpid[MAX_ID_LEN + 2];
strcpy(cmpid, "#");
strncat(cmpid, id, MAX_ID_LEN);

while (fgets(line, MAX_ID_LEN * 2 + 1, file)) {
if (strncmp(line, "#book", 5) == 0) {
break;
}

size_t index = 0;
while (line[index] != '\0' && index < MAX_ID_LEN + 2) {
if (line[index] == ':')
break;
index++;
}

// index must be greater than 2 to allow for at least #i:v
if (index > 2 && strncmp(line, cmpid, index - 1) == 0) {
size_t i = index + 1;
while (i < index + MAX_ID_LEN && line[i] != '\0'
&& line[i] != '\n') {
value[i - index - 1] = line[i];
i++;
}

value[i - index - 1] = '\0';
break;
}
}

value[MAX_ID_LEN] = '\0'; // just in case

return value;
}

+ 50
- 0
test.c View File

@@ -0,0 +1,50 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>

#include "haplous.h"

struct haplous_work work;
int err = 0;

void testRef(char *name, int chapter, int start, int to)
{
struct haplous_reference ref = {
name,
chapter,
start,
to,
};
char *text =
haplous_work_verses_get(work.file, ref, &err);

if (err != HAPLOUS_OK) {
printf("Error parsing: %s %d %d:%d (%d)\n", name, chapter, start, to, err);
}

// Demo didn't have this - leaked a lot of memory..
free(text);
}

int main()
{
work = haplous_work_init("kjv.txt", &err);
if (err != HAPLOUS_OK) {
puts("Err");
return -1;
}

clock_t start_time = clock();
for (int i = 0; i < 2; i++) {
testRef("Rev", 1, 1, 1);
}

double elapsed_time =
(double)(clock() - start_time) / CLOCKS_PER_SEC;

printf("Done in %f seconds\n", elapsed_time);
haplous_work_cleanup(&work);
}

Loading…
Cancel
Save