From 1b403105e45fbee80df1a3d130de37f25791d7f9 Mon Sep 17 00:00:00 2001 From: Andre Leiradella Date: Wed, 7 Oct 2015 19:28:38 -0300 Subject: [PATCH] fixed plain converter; added lua converter back until we're sure the plain converter is working --- libretro-db/Makefile | 23 +- libretro-db/dat_converter | 3 +- libretro-db/dat_converter.lua | 216 +++++++++++ libretro-db/lexer.c | 274 ++++++------- libretro-db/lexer.h | 88 +++-- libretro-db/lua_common.c | 99 +++++ libretro-db/lua_common.h | 12 + libretro-db/parser.c | 254 ++++++------ libretro-db/parser.h | 19 +- libretro-db/plain_converter.c | 705 +++++++++++++++++++++++----------- 10 files changed, 1179 insertions(+), 514 deletions(-) create mode 100644 libretro-db/dat_converter.lua create mode 100644 libretro-db/lua_common.c create mode 100644 libretro-db/lua_common.h diff --git a/libretro-db/Makefile b/libretro-db/Makefile index 5d520e2d33..df9b6654b9 100644 --- a/libretro-db/Makefile +++ b/libretro-db/Makefile @@ -2,6 +2,20 @@ CFLAGS = -g -O2 LIBRETRO_COMMON_DIR := ../libretro-common INCFLAGS = -I. -I$(LIBRETRO_COMMON_DIR)/include +LUA_CONVERTER_C = \ + rmsgpack.c \ + rmsgpack_dom.c \ + lua_common.c \ + libretrodb.c \ + bintree.c \ + query.c \ + lua_converter.c \ + $(LIBRETRO_COMMON_DIR)/compat/compat_fnmatch.c \ + $(LIBRETRO_COMMON_DIR)/file/retro_file.c \ + $(LIBRETRO_COMMON_DIR)/compat/compat.c + +LUA_CONVERTER_C_OBJS := $(LUA_CONVERTER_C:.c=.o) + PLAIN_CONVERTER_C = \ lexer.c \ parser.c \ @@ -56,14 +70,17 @@ TESTLIB_FLAGS = ${CFLAGS} ${LUA_FLAGS} -shared -fpic .PHONY: all clean check -all: rmsgpack_test libretrodb_tool plain_dat_converter +all: rmsgpack_test libretrodb_tool plain_converter %.o: %.c ${CC} $(INCFLAGS) $< -c ${CFLAGS} -o $@ -plain_dat_converter: ${PLAIN_CONVERTER_C_OBJS} +plain_converter: ${PLAIN_CONVERTER_C_OBJS} ${CC} $(INCFLAGS) ${PLAIN_CONVERTER_C_OBJS} -o $@ +lua_converter: ${LUA_CONVERTER_C_OBJS} + ${CC} $(INCFLAGS) ${LUA_CONVERTER_C_OBJS} ${LUA_FLAGS} -o $@ + libretrodb_tool: ${RARCHDB_TOOL_OBJS} ${CC} $(INCFLAGS) ${RARCHDB_TOOL_OBJS} -o $@ @@ -80,4 +97,4 @@ clean: rm -rf $(LIBRETRO_COMMON_DIR)/*.o rm -rf $(LIBRETRO_COMMON_DIR)/compat/*.o rm -rf $(LIBRETRO_COMMON_DIR)/file/*.o - rm -rf *.o rmsgpack_test plain_dat_converter libretrodb_tool testlib.so + rm -rf *.o rmsgpack_test plain_converter libretrodb_tool testlib.so diff --git a/libretro-db/dat_converter b/libretro-db/dat_converter index 948c1ab115..1113b3869e 100755 --- a/libretro-db/dat_converter +++ b/libretro-db/dat_converter @@ -1,4 +1,5 @@ #!/bin/sh rdb_file="$1" shift 1 -./plain_dat_converter "$rdb_file" dat_converter.lua "$@" +# ./lua_converter "$rdb_file" dat_converter.lua "$@" +./plain_converter "$rdb_file" "$@" diff --git a/libretro-db/dat_converter.lua b/libretro-db/dat_converter.lua new file mode 100644 index 0000000000..ab24782e5d --- /dev/null +++ b/libretro-db/dat_converter.lua @@ -0,0 +1,216 @@ +local dat_obj = {} +local match_key = nil + +local function dat_lexer(f, fname) + local line, err = f:read("*l") + local location = {line_no = 1, column = 1, fname = fname} + return function() + local tok = nil + while not tok do + if not line then + return nil + end + pre_space, tok, line = string.match(line, "^(%s*)(..-)([()]*%s.*)") + if tok and string.match(tok, "^\"") then + tok, line = string.match(tok..line, "^\"([^\"]-)\"(.*)") + elseif tok and string.match(tok, "^[()]") then + line = tok:sub(2) .. line + tok = tok:sub(1,1) + end + location.column = location.column + #(pre_space or "") + tok_loc = { + line_no = location.line_no, + column = location.column, + fname = location.fname + } + if not line then + line = f:read("*l") + location.line_no = location.line_no + 1 + location.column = 1 + else + location.column = location.column + #tok + end + end + -- print(tok) + return tok, tok_loc + end +end + +local function dat_parse_table(lexer, start_loc) + local res = {} + local state = "key" + local key = nil + for tok, loc in lexer do + if state == "key" then + if tok == ")" then + return res + elseif tok == "(" then + error(string.format( + "%s:%d:%d: fatal error: Unexpected '(' instead of key", + loc.fname, + loc.line_no, + loc.column + )) + else + key = tok + state = "value" + end + else + if tok == "(" then + res[key] = dat_parse_table(lexer, loc) + elseif tok == ")" then + error(string.format( + "%s:%d:%d: fatal error: Unexpected ')' instead of value", + loc.fname, + loc.line_no, + loc.column + )) + else + res[key] = tok + end + state = "key" + end + end + error(string.format( + "%s:%d:%d: fatal error: Missing ')' for '('", + start_loc.fname, + start_loc.line_no, + start_loc.column + )) +end + +local function dat_parser(lexer) + local res = {} + local state = "key" + local key = nil + local skip = true + for tok, loc in lexer do + if state == "key" then + if tok == "game" then + skip = false + end + state = "value" + else + if tok == "(" then + local v = dat_parse_table(lexer, loc) + if not skip then + table.insert(res, v) + skip = true + end + else + error(string.format( + "%s:%d:%d: fatal error: Expected '(' found '%s'", + loc.fname, + loc.line_no, + loc.column, + tok + )) + end + state = "key" + end + end + return res +end + +local function unhex(s) + if not s then return nil end + return (s:gsub('..', function (c) + return string.char(tonumber(c, 16)) + end)) +end + +local function get_match_key(mk, t) + for p in string.gmatch(mk, "(%w+)[.]?") do + if p == nil or t == nil then + error("Invalid match key '"..mk.."'") + end + t = t[p] + end + return t +end + +table.update = function(a, b) + for k,v in pairs(b) do + a[k] = v + end +end + +function init(...) + local args = {...} + table.remove(args, 1) + if #args == 0 then + assert(dat_path, "dat file argument is missing") + end + + if #args > 1 then + match_key = table.remove(args, 1) + end + + local dat_hash = {} + for _, dat_path in ipairs(args) do + local dat_file, err = io.open(dat_path, "r") + if err then + error("could not open dat file '" .. dat_path .. "':" .. err) + end + + print("Parsing dat file '" .. dat_path .. "'...") + local objs = dat_parser(dat_lexer(dat_file, dat_path)) + dat_file:close() + for _, obj in pairs(objs) do + if match_key then + local mk = get_match_key(match_key, obj) + if mk == nil then + error("missing match key '" .. match_key .. "' in one of the entries") + end + if dat_hash[mk] == nil then + dat_hash[mk] = {} + table.insert(dat_obj, dat_hash[mk]) + end + table.update(dat_hash[mk], obj) + else + table.insert(dat_obj, obj) + end + end + end +end + +function get_value() + local t = table.remove(dat_obj) + if not t then + return + else + return { + name = t.name, + description = t.description, + rom_name = t.rom.name, + size = uint(tonumber(t.rom.size)), + users = uint(tonumber(t.users)), + releasemonth = uint(tonumber(t.releasemonth)), + releaseyear = uint(tonumber(t.releaseyear)), + rumble = uint(tonumber(t.rumble)), + analog = uint(tonumber(t.analog)), + + famitsu_rating = uint(tonumber(t.famitsu_rating)), + edge_rating = uint(tonumber(t.edge_rating)), + edge_issue = uint(tonumber(t.edge_issue)), + edge_review = t.edge_review, + + enhancement_hw = t.enhancement_hw, + barcode = t.barcode, + esrb_rating = t.esrb_rating, + elspa_rating = t.elspa_rating, + pegi_rating = t.pegi_rating, + cero_rating = t.cero_rating, + franchise = t.franchise, + + developer = t.developer, + publisher = t.publisher, + origin = t.origin, + + crc = binary(unhex(t.rom.crc)), + md5 = binary(unhex(t.rom.md5)), + sha1 = binary(unhex(t.rom.sha1)), + serial = binary(t.serial or t.rom.serial), + } + end +end diff --git a/libretro-db/lexer.c b/libretro-db/lexer.c index 43482ebfd1..fa2f55257b 100644 --- a/libretro-db/lexer.c +++ b/libretro-db/lexer.c @@ -1,136 +1,138 @@ -#include -#include - -#include "lexer.h" - -static void skip(lx_state_t* lexer) -{ - if ( lexer->current < lexer->end ) - lexer->current++; -} - -int lx_next(lx_state_t* lexer) -{ - /* skip spaces */ - for ( ;; ) - { - if ( isspace( *lexer->current ) ) - { - if ( *lexer->current == '\n' ) - { - lexer->line++; - } - - skip( lexer ); - } - else if ( *lexer->current != 0 ) - { - break; - } - else - { - /* return LX_EOF if we've reached the end of the input */ - lexer->start = ""; - lexer->len = 5; - lexer->token = LX_EOF; - return 0; - } - } - - lexer->start = lexer->current; - - /* if the character is alphabetic or '_', the token is an identifier */ - if ( isalpha( *lexer->current ) || *lexer->current == '_' ) - { - /* get all alphanumeric and '_' characters */ - do - { - skip( lexer ); - } - while ( isalnum( *lexer->current ) || *lexer->current == '_' ); - - lexer->len = lexer->current - lexer->start; - lexer->token = LX_TAG; - return 0; - } - - /* if the character is an hex digit, the token is a number */ - if ( isxdigit( *lexer->current ) ) - { - do - { - skip( lexer ); - } - while ( isxdigit( *lexer->current ) ); - - if ( *lexer->current == '-' ) - { - do - { - skip( lexer ); - } - while ( isxdigit( *lexer->current ) ); - - lexer->token = LX_VERSION; - } - else - { - lexer->token = LX_NUMBER; - } - - lexer->len = lexer->current - lexer->start; - return 0; - } - - /* if the character is a quote, it's a string */ - if ( *lexer->current == '"' ) - { - /* get anything until another quote */ - do - { - skip( lexer ); - - if ( *lexer->current == '"' && lexer->current[ -1 ] != '\\' ) - { - break; - } - } - while ( lexer->current < lexer->end ); - - if ( lexer->current == lexer->end ) - { - return LX_UNTERMINATED_STRING; - } - - skip( lexer ); - lexer->start++; - lexer->len = lexer->current - lexer->start - 1; - lexer->token = LX_STRING; - return 0; - } - - /* otherwise the token is a symbol */ - lexer->len = 1; - - switch ( *lexer->current++ ) - { - case '(': - lexer->token = LX_LPAREN; - return 0; - case ')': - lexer->token = LX_RPAREN; - return 0; - } - - return LX_INVALID_CHARACTER; -} - -void lx_new( lx_state_t* lexer, const char* source, unsigned srclen ) -{ - lexer->line = 1; - lexer->current = source; - lexer->end = source + srclen; - lexer->start = NULL; - lexer->len = 0; - lexer->token = 0; -} +#include +#include + +#include "lexer.h" + +static void skip( lx_state_t* lexer ) +{ + if ( lexer->current < lexer->end ) + { + lexer->current++; + } +} + +int lx_next( lx_state_t* lexer ) +{ + /* skip spaces */ + for ( ;; ) + { + if ( isspace( *lexer->current ) ) + { + if ( *lexer->current == '\n' ) + { + lexer->line++; + } + + skip( lexer ); + } + else if ( *lexer->current != 0 ) + { + break; + } + else + { + /* return LX_EOF if we've reached the end of the input */ + lexer->lexeme.str = ""; + lexer->lexeme.len = 5; + lexer->token = LX_EOF; + return 0; + } + } + + lexer->lexeme.str = lexer->current; + + /* if the last token was the "version" tag, parse anything until a blank */ + if ( lexer->last_was_version ) + { + do + { + skip( lexer ); + } + while ( !isspace( *lexer->current ) ); + + lexer->lexeme.len = lexer->current - lexer->lexeme.str; + lexer->token = LX_VERSION; + lexer->last_was_version = 0; + return 0; + } + + /* if the character is alphabetic or '_', the token is an identifier */ + if ( isalpha( *lexer->current ) || *lexer->current == '_' ) + { + /* get all alphanumeric and '_' characters */ + do + { + skip( lexer ); + } + while ( isalnum( *lexer->current ) || *lexer->current == '_' ); + + lexer->lexeme.len = lexer->current - lexer->lexeme.str; + lexer->token = LX_TAG; + lexer->last_was_version = !strncmp( lexer->lexeme.str, "version", 7 ); + return 0; + } + + /* if the character is an hex digit, the token is a number */ + if ( isxdigit( *lexer->current ) ) + { + do + { + skip( lexer ); + } + while ( isxdigit( *lexer->current ) ); + + lexer->lexeme.len = lexer->current - lexer->lexeme.str; + lexer->token = LX_NUMBER; + return 0; + } + + /* if the character is a quote, it's a string */ + if ( *lexer->current == '"' ) + { + /* get anything until another quote */ + do + { + skip( lexer ); + + if ( *lexer->current == '"' && lexer->current[ -1 ] != '\\' ) + { + break; + } + } + while ( lexer->current < lexer->end ); + + if ( lexer->current == lexer->end ) + { + return LX_UNTERMINATED_STRING; + } + + skip( lexer ); + lexer->lexeme.str++; + lexer->lexeme.len = lexer->current - lexer->lexeme.str - 1; + lexer->token = LX_STRING; + return 0; + } + + /* otherwise the token is a symbol */ + lexer->lexeme.len = 1; + + switch ( *lexer->current++ ) + { + case '(': lexer->token = LX_LPAREN; return 0; + case ')': lexer->token = LX_RPAREN; return 0; + } + + return LX_INVALID_CHARACTER; +} + +void lx_new( lx_state_t* lexer, const char* source, unsigned srclen ) +{ + lexer->line = 1; + lexer->current = source; + lexer->end = source + srclen; + lexer->lexeme.str = NULL; + lexer->lexeme.len = 0; + lexer->token = 0; + lexer->last_was_version = 0; +} + diff --git a/libretro-db/lexer.h b/libretro-db/lexer.h index 63835b6cac..04bb4755c3 100644 --- a/libretro-db/lexer.h +++ b/libretro-db/lexer.h @@ -1,40 +1,48 @@ -#ifndef LEXER_H -#define LEXER_H - -enum -{ - LX_UNTERMINATED_STRING = -1, - LX_INVALID_CHARACTER = -2, -}; - -enum -{ - LX_EOF = 256, - LX_TAG, - LX_NUMBER, - LX_STRING, - LX_VERSION, - LX_LPAREN, - LX_RPAREN, -}; - -typedef struct -{ - /* source code */ - int line; - - /* lexer state */ - const char* current; - const char* end; - - /* lookahead */ - int token; - const char* start; - unsigned len; -} -lx_state_t; - -int lx_next( lx_state_t* lexer ); -void lx_new( lx_state_t* lexer, const char* source, unsigned srclen ); - -#endif /* LEXER_H */ +#ifndef LEXER_H +#define LEXER_H + +enum +{ + LX_UNTERMINATED_STRING = -1, + LX_INVALID_CHARACTER = -2, +}; + +enum +{ + LX_EOF = 256, + LX_TAG, + LX_NUMBER, + LX_STRING, + LX_VERSION, + LX_LPAREN, + LX_RPAREN, +}; + +typedef struct +{ + const char* str; + unsigned len; +} +lx_string_t; + +typedef struct +{ + /* source code */ + int line; + + /* lexer state */ + const char* current; + const char* end; + int last_was_version; + + /* lookahead */ + int token; + lx_string_t lexeme; +} +lx_state_t; + +int lx_next( lx_state_t* lexer ); +void lx_new( lx_state_t* lexer, const char* source, unsigned srclen ); + +#endif /* LEXER_H */ + diff --git a/libretro-db/lua_common.c b/libretro-db/lua_common.c new file mode 100644 index 0000000000..0334a571f6 --- /dev/null +++ b/libretro-db/lua_common.c @@ -0,0 +1,99 @@ +#include "lua_common.h" + +#include +#include + +int libretrodb_lua_to_rmsgpack_value(lua_State *L, int index, + struct rmsgpack_dom_value * out) +{ + size_t tmp_len; + lua_Number tmp_num; + struct rmsgpack_dom_value * tmp_value; + int i, rv = -1; + const char * tmp_string = NULL; + char * tmp_buff = NULL; + const int key_idx = -2; + const int value_idx = -1; + const int MAX_FIELDS = 100; + + out->type = RDT_MAP; + out->val.map.len = 0; + out->val.map.items = calloc(MAX_FIELDS, sizeof(struct rmsgpack_dom_pair)); + lua_pushnil(L); + while (lua_next(L, index - 1) != 0) + { + if (out->val.map.len > MAX_FIELDS) + printf("skipping due to too many keys\n"); + else if (!lua_isstring(L, key_idx)) + printf("skipping non string key\n"); + else if (lua_isnil(L, value_idx)) + { + /* Skipping nil value fields to save disk space */ + } + else + { + i = out->val.map.len; + tmp_buff = strdup(lua_tostring(L, key_idx)); + out->val.map.items[i].key.type = RDT_STRING; + out->val.map.items[i].key.val.string.len = strlen(tmp_buff); + out->val.map.items[i].key.val.string.buff = tmp_buff; + + tmp_value = &out->val.map.items[i].value; + switch (lua_type(L, value_idx)) + { + case LUA_TNUMBER: + tmp_num = lua_tonumber(L, value_idx); + tmp_value->type = RDT_INT; + tmp_value->val.int_ = tmp_num; + break; + case LUA_TBOOLEAN: + tmp_value->type = RDT_BOOL; + tmp_value->val.bool_ = lua_toboolean(L, value_idx); + break; + case LUA_TSTRING: + tmp_buff = strdup(lua_tostring(L, value_idx)); + tmp_value->type = RDT_STRING; + tmp_value->val.string.len = strlen(tmp_buff); + tmp_value->val.string.buff = tmp_buff; + break; + case LUA_TTABLE: + lua_getfield(L, value_idx, "binary"); + if (!lua_isstring(L, -1)) + { + lua_pop(L, 1); + lua_getfield(L, value_idx, "uint"); + if (!lua_isnumber(L, -1)) + { + lua_pop(L, 1); + goto set_nil; + } + else + { + tmp_num = lua_tonumber(L, -1); + tmp_value->type = RDT_UINT; + tmp_value->val.uint_ = tmp_num; + lua_pop(L, 1); + } + } + else + { + tmp_string = lua_tolstring(L, -1, &tmp_len); + tmp_buff = malloc(tmp_len); + memcpy(tmp_buff, tmp_string, tmp_len); + tmp_value->type = RDT_BINARY; + tmp_value->val.binary.len = tmp_len; + tmp_value->val.binary.buff = tmp_buff; + lua_pop(L, 1); + } + break; + default: +set_nil: + tmp_value->type = RDT_NULL; + } + out->val.map.len++; + } + lua_pop(L, 1); + } + rv = 0; + return rv; +} diff --git a/libretro-db/lua_common.h b/libretro-db/lua_common.h new file mode 100644 index 0000000000..9728d2c7c1 --- /dev/null +++ b/libretro-db/lua_common.h @@ -0,0 +1,12 @@ +#ifndef __RARCHDB_LUA_COMMON_H__ +#define __RARCHDB_LUA_COMMON_H__ + +#include +#include +#include + +#include "rmsgpack_dom.h" + +int libretrodb_lua_to_rmsgpack_value(lua_State *L, int index, struct rmsgpack_dom_value *out); + +#endif diff --git a/libretro-db/parser.c b/libretro-db/parser.c index f42b8573ac..98e16e6818 100644 --- a/libretro-db/parser.c +++ b/libretro-db/parser.c @@ -1,155 +1,179 @@ #include #include -#include - #include "parser.h" static void match_any( pr_state_t* parser ) { - switch ( lx_next( &parser->lexer ) ) - { - case LX_UNTERMINATED_STRING: - longjmp( parser->env, PR_UNTERMINATED_STRING ); - case LX_INVALID_CHARACTER: - longjmp( parser->env, PR_INVALID_CHARACTER ); - } + switch ( lx_next( &parser->lexer ) ) + { + case LX_UNTERMINATED_STRING: longjmp( parser->env, PR_UNTERMINATED_STRING ); + case LX_INVALID_CHARACTER: longjmp( parser->env, PR_INVALID_CHARACTER ); + } } static void match( pr_state_t* parser, int token ) { - if ( parser->lexer.token != token ) - longjmp( parser->env, PR_UNEXPECTED_TOKEN ); - - match_any( parser ); + if ( parser->lexer.token != token ) + { + longjmp( parser->env, PR_UNEXPECTED_TOKEN ); + } + + match_any( parser ); } static void match_tag( pr_state_t* parser, const char* tag ) { - if ( parser->lexer.token != LX_TAG || strncmp( parser->lexer.start, tag, strlen( tag ) ) ) - longjmp( parser->env, PR_UNEXPECTED_TOKEN ); - - match_any( parser ); + if ( parser->lexer.token != LX_TAG || strncmp( parser->lexer.lexeme.str, tag, strlen( tag ) ) ) + { + longjmp( parser->env, PR_UNEXPECTED_TOKEN ); + } + + match_any( parser ); } -static void parse_value( pr_state_t* parser, const char* key, unsigned keylen, pr_node_t* node, int isrom ) +static int cmp_keys( const pr_key_t* key1, const pr_key_t* key2 ) { - unsigned i; - - if ( isrom && keylen == 4 && !strncmp( key, "name", 4 ) ) - { - key = "rom_name"; - keylen = 8; - } - - for ( i = 0; i < node->count; i++ ) - { - if ( keylen == node->pairs[ i ].key_len && !strncmp( key, node->pairs[ i ].key, keylen ) ) - break; - } - - if ( i == node->count ) - node->count++; - - node->pairs[ i ].key = key; - node->pairs[ i ].key_len = keylen; - - node->pairs[ i ].value = parser->lexer.start; - node->pairs[ i ].value_len = parser->lexer.len; - - if ( parser->lexer.token == LX_STRING || parser->lexer.token == LX_NUMBER || parser->lexer.token == LX_TAG ) - match_any( parser ); - else - longjmp( parser->env, PR_UNEXPECTED_TOKEN ); + if ( !key1 && !key2 ) + { + return 1; + } + + if ( ( key1 && !key2 ) || ( !key1 && key2 ) ) + { + return 0; + } + + if ( key1->key.len != key2->key.len || strncmp( key1->key.str, key2->key.str, key1->key.len ) ) + { + return 0; + } + + return cmp_keys( key1->prev, key2->prev ); } -static void parse_map( pr_state_t* parser, int skip, int isrom ) +static pr_pair_t* find_key( pr_node_t* node, const pr_key_t* key ) { - pr_node_t dummy; - pr_node_t* node; + int i; + + for ( i = 0; i < node->count; i++ ) + { + const pr_key_t* other = &node->pairs[ i ].key; + + if ( cmp_keys( key, other ) ) + { + return node->pairs + i; + } + } + + return node->pairs + node->count++; +} - if ( skip ) - { - node = &dummy; - dummy.count = 0; - } - else - node = parser->node; +static void parse_value( pr_state_t* parser, const pr_key_t* key, pr_node_t* node ) +{ + pr_pair_t* pair = find_key( node, key ); + int token = parser->lexer.token; + + pair->key = *key; + pair->value = parser->lexer.lexeme; + + if ( token == LX_STRING || token == LX_NUMBER || token == LX_VERSION || token == LX_TAG ) + { + match_any( parser ); + } + else + { + longjmp( parser->env, PR_UNEXPECTED_TOKEN ); + } +} - match( parser, LX_LPAREN ); - - while ( parser->lexer.token != LX_RPAREN ) - { - unsigned hash; - const char* key; - unsigned keylen; - - if ( parser->lexer.token != LX_TAG ) - longjmp( parser->env, PR_UNEXPECTED_TOKEN ); - - key = parser->lexer.start; - keylen = parser->lexer.len; - - hash = djb2_calculate(key); - - match_any( parser ); - - switch ( hash ) - { - case 0x0b88a693U: /* rom */ - parse_map( parser, skip, 1 ); - break; - - default: - parse_value( parser, key, keylen, node, isrom ); - break; - } - } - - match_any( parser ); +static void parse_map( pr_state_t* parser, const pr_key_t* prev, int skip ) +{ + pr_node_t dummy; + pr_node_t* node; + pr_key_t key; + + if ( skip ) + { + dummy.count = 0; + node = &dummy; + } + else + { + node = parser->node; + } + + match( parser, LX_LPAREN ); + + while ( parser->lexer.token != LX_RPAREN ) + { + key.key = parser->lexer.lexeme; + key.prev = prev; + match( parser, LX_TAG ); + + if ( parser->lexer.token == LX_LPAREN ) + { + parse_map( parser, &key, skip ); + } + else + { + parse_value( parser, &key, node ); + } + } + + match_any( parser ); } static void parse_clrmamepro( pr_state_t* parser ) { - match_tag( parser, "clrmamepro" ); - parse_map( parser, 1, 0 ); + static const pr_key_t clrmamepro = { { "clrmamepro", 10 }, NULL }; + + match_tag( parser, clrmamepro.key.str ); + parse_map( parser, &clrmamepro, 1 ); } static void parse_game( pr_state_t* parser ) { - match_tag( parser, "game" ); - - pr_node_t* node = (pr_node_t*)malloc( sizeof( pr_node_t ) ); - - if ( node == NULL ) - longjmp( parser->env, PR_OUT_OF_MEMORY ); - - node->count = 0; - parser->node = node; - *parser->prev = node; - parser->prev = &node->next; - parse_map( parser, 0, 0 ); + static const pr_key_t game = { { "game", 4 }, NULL }; + + match_tag( parser, game.key.str ); + + pr_node_t* node = (pr_node_t*)malloc( sizeof( pr_node_t ) ); + + if ( node == NULL ) + { + longjmp( parser->env, PR_OUT_OF_MEMORY ); + } + + node->count = 0; + parser->node = node; + *parser->prev = node; + parser->prev = &node->next; + parse_map( parser, NULL, 0 ); } void pr_new( pr_state_t* parser, const char* source, unsigned srclen ) { - lx_new( &parser->lexer, source, srclen ); - parser->prev = &parser->first; + lx_new( &parser->lexer, source, srclen ); + parser->prev = &parser->first; } int pr_parse( pr_state_t* parser ) { - int res; - - if ( ( res = setjmp( parser->env ) ) == 0 ) - { - match_any( parser ); - parse_clrmamepro( parser ); - - while ( parser->lexer.token != LX_EOF ) - parse_game( parser ); - } - - *parser->prev = NULL; - return res; + int res; + + if ( ( res = setjmp( parser->env ) ) == 0 ) + { + match_any( parser ); + parse_clrmamepro( parser ); + + while ( parser->lexer.token != LX_EOF ) + { + parse_game( parser ); + } + } + + *parser->prev = NULL; + return res; } + diff --git a/libretro-db/parser.h b/libretro-db/parser.h index c807505334..8316abd6a1 100644 --- a/libretro-db/parser.h +++ b/libretro-db/parser.h @@ -5,6 +5,8 @@ #include "lexer.h" +#define PR_MAX_FIELDS 64 + enum { PR_UNTERMINATED_STRING = -1, @@ -13,12 +15,18 @@ enum PR_OUT_OF_MEMORY = -4, }; +typedef struct pr_key_t pr_key_t; + +struct pr_key_t +{ + lx_string_t key; + const pr_key_t* prev; +}; + typedef struct { - const char* key; - unsigned key_len; - const char* value; - unsigned value_len; + pr_key_t key; + lx_string_t value; } pr_pair_t; @@ -26,7 +34,7 @@ typedef struct pr_node_t pr_node_t; struct pr_node_t { - pr_pair_t pairs[ 64 ]; + pr_pair_t pairs[ PR_MAX_FIELDS ]; unsigned count; pr_node_t* next; }; @@ -45,3 +53,4 @@ void pr_new( pr_state_t* parser, const char* source, unsigned srclen ); int pr_parse( pr_state_t* parser ); #endif /* PARSER_H */ + diff --git a/libretro-db/plain_converter.c b/libretro-db/plain_converter.c index abfeaab7f7..5499e2d111 100644 --- a/libretro-db/plain_converter.c +++ b/libretro-db/plain_converter.c @@ -1,214 +1,491 @@ -#include -#include -#include -#include -#include - -#include -#include - -#include "parser.h" -#include "libretrodb.h" - -static const char *printchar( pr_state_t* parser ) -{ - static char k[ 16 ]; - - if ( *parser->lexer.current < 32 ) - { - snprintf( k, sizeof( k ), "\\x%02x", (unsigned char)*parser->lexer.current ); - k[ sizeof( k ) - 1 ] = 0; - } - else - { - k[ 0 ] = *parser->lexer.current; - k[ 1 ] = 0; - } - - return k; -} - -static const char *printtoken( pr_state_t* parser ) -{ - static char k[ 256 ]; - char *aux = k; - const char *end = aux + sizeof( k ) - 1; - - while ( parser->lexer.len-- && aux < end ) - *aux++ = *parser->lexer.start++; - - *aux = 0; - return k; -} - -static char *dup_string( const char* str, unsigned len ) -{ - char *dup = (char*)malloc( len + 1 ); - - if (dup) - { - memcpy( (void*)dup, (const void*)str, len ); - dup[ len ] = 0; - } - - return dup; -} - -static unsigned char *dup_binary( const char* str, unsigned len ) -{ - char byte[3]; - unsigned char* dup = (unsigned char*)malloc( len / 2 ); - unsigned char* aux = dup; - - byte[ 2 ] = 0; - - if ( dup ) - { - len /= 2; - - while ( len-- ) - { - byte[ 0 ] = *str++; - byte[ 1 ] = *str++; - printf( "%s", byte ); - *aux++ = strtol( byte, NULL, 16 ); - } - printf( "\n" ); - } - - return dup; -} - -static int provider( void* ctx, struct rmsgpack_dom_value* out ) -{ - unsigned i; - pr_node_t **game_ptr = (pr_node_t**)ctx; - pr_node_t *game = *game_ptr; - - if (!game) - return 1; - - *game_ptr = game->next; - - out->type = RDT_MAP; - out->val.map.len = game->count; - out->val.map.items = calloc( game->count, sizeof(struct rmsgpack_dom_pair)); - - for ( i = 0; i < game->count; i++ ) - { - unsigned hash; - - out->val.map.items[ i ].key.type = RDT_STRING; - out->val.map.items[ i ].key.val.string.len = game->pairs[ i ].key_len; - out->val.map.items[ i ].key.val.string.buff = dup_string( game->pairs[ i ].key, game->pairs[ i ].key_len ); - - hash = djb2_calculate(game->pairs[ i ].key); - - switch ( hash ) - { - case 0x0b88671dU: /* crc */ - case 0x0f3ea922U: /* crc32 */ - case 0x0b888fabU: /* md5 */ - case 0x7c9de632U: /* sha1 */ - out->val.map.items[ i ].value.type = RDT_BINARY; - out->val.map.items[ i ].value.val.binary.len = game->pairs[ i ].value_len / 2; - out->val.map.items[ i ].value.val.binary.buff = dup_binary( game->pairs[ i ].value, game->pairs[ i ].value_len ); - break; - - case 0x7c9dede0U: /* size */ - out->val.map.items[ i ].value.type = RDT_UINT; - out->val.map.items[ i ].value.val.uint_ = strtol( game->pairs[ i ].value, NULL, 10 ); - break; - - default: - out->val.map.items[ i ].value.type = RDT_STRING; - out->val.map.items[ i ].value.val.string.len = game->pairs[ i ].value_len; - out->val.map.items[ i ].value.val.string.buff = dup_string( game->pairs[ i ].value, game->pairs[ i ].value_len ); - break; - } - } - - return 0; -} - -int main( int argc, const char* argv[] ) -{ - char* source; - unsigned size; - pr_state_t parser; - pr_node_t* game; - pr_node_t* next; - RFILE *out, *file; - int res; - - if ( argc != 3 ) - { - fprintf( stderr, "usage:\ndatconv \n\n" ); - return 1; - } - - file = retro_fopen(argv[ 2 ], RFILE_MODE_READ, -1); - - if (!file) - { - fprintf( stderr, "Error opening DAT file: %s\n", argv[2] ); - return 1; - } - - retro_fseek(file, 0, SEEK_END ); - size = retro_ftell( file ); - retro_fseek( file, 0, SEEK_SET ); - - source = (char*)malloc( size + 1 ); - - if ( source == NULL ) - { - retro_fclose( file ); - fprintf( stderr, "Out of memory\n" ); - return 1; - } - - retro_fread(file, (void*)source, size); - retro_fclose( file ); - source[ size ] = 0; - - pr_new( &parser, source, size ); - res = pr_parse( &parser ); - - switch ( res ) - { - case PR_UNTERMINATED_STRING: - fprintf( stderr, "%s:%u: Unterminated string\n", "source", parser.lexer.line ); - break; - case PR_INVALID_CHARACTER: - fprintf( stderr, "%s:%u: Invalid character %s\n", "source", parser.lexer.line, printchar( &parser ) ); - break; - case PR_UNEXPECTED_TOKEN: - fprintf( stderr, "%s:%u: Unexpected token \"%s\"\n", "source", parser.lexer.line, printtoken( &parser ) ); - break; - case PR_OUT_OF_MEMORY: - fprintf( stderr, "%s:%u: Out of memory\n", "source", parser.lexer.line ); - break; - default: - game = parser.first; - out = retro_fopen( argv[ 1 ], RFILE_MODE_WRITE, -1); - - if (out) - { - res = libretrodb_create(out, &provider, (void*)&game ); - retro_fclose(out); - - while (game) - { - next = game->next; - free( game ); - game = next; - } - } - else - res = 1; - } - - free( source ); - return res; -} +#include +#include +#include +#include + +#include "parser.h" +#include "libretrodb.h" + +static int cmpkeystr_( const pr_key_t* key, const char** str ) +{ + int res = 1; + + if ( key->prev ) + { + res = cmpkeystr_( key->prev, str ); + } + + res = res && !strncmp( key->key.str, *str, key->key.len ); + ( *str ) += key->key.len + 1; + return res; +} + +static int cmpkeystr( const pr_key_t* key, const char* str ) +{ + return cmpkeystr_( key, &str ); +} + +static int cmpkeys( const pr_key_t* key1, const pr_key_t* key2 ) +{ + if ( !key1 && !key2 ) + { + return 1; + } + + if ( ( key1 && !key2 ) || ( !key1 && key2 ) ) + { + return 0; + } + + if ( key1->key.len != key2->key.len || strncmp( key1->key.str, key2->key.str, key1->key.len ) ) + { + return 0; + } + + return cmpkeys( key1->prev, key2->prev ); +} + +static const char* printchar( pr_state_t* parser ) +{ + static char k[ 16 ]; + + if ( *parser->lexer.current < 32 ) + { + snprintf( k, sizeof( k ), "\\x%02x", (unsigned char)*parser->lexer.current ); + k[ sizeof( k ) - 1 ] = 0; + } + else + { + k[ 0 ] = *parser->lexer.current; + k[ 1 ] = 0; + } + + return k; +} + +static void printkey( const pr_key_t* key ) +{ + int i; + + if ( key->prev ) + { + printkey( key->prev ); + printf( "." ); + } + + for ( i = 0; i < key->key.len; i++ ) + { + printf( "%c", key->key.str[ i ] ); + } +} + +static void printstr( const lx_string_t* str ) +{ + int i; + + for ( i = 0; i < str->len; i++ ) + { + printf( "%c", str->str[ i ] ); + } +} + +static const char* printtoken( pr_state_t* parser ) +{ + static char k[ 256 ]; + char* aux = k; + const char* end = aux + sizeof( k ) - 1; + + while ( parser->lexer.lexeme.len-- && aux < end ) + { + *aux++ = *parser->lexer.lexeme.str++; + } + + *aux = 0; + return k; +} + +static char* dup_string( const lx_string_t* str ) +{ + char* dup = (char*)malloc( str->len + 1 ); + + if ( dup ) + { + memcpy( (void*)dup, (const void*)str->str, str->len ); + dup[ str->len ] = 0; + } + + return dup; +} + +static unsigned char* dup_hexa( const lx_string_t* str ) +{ + unsigned len = str->len / 2; + unsigned char* dup = (unsigned char*)malloc( len ); + unsigned char* aux = dup; + char byte[ 3 ]; + byte[ 2 ] = 0; + + if ( dup ) + { + const char* s = str->str; + + while ( len-- ) + { + byte[ 0 ] = *s++; + byte[ 1 ] = *s++; + *aux++ = strtol( byte, NULL, 16 ); + } + } + + return dup; +} + +static void* dup_memory( const lx_string_t* str ) +{ + void* dup = (unsigned char*)malloc( str->len ); + + if ( dup ) + { + memcpy( dup, (void*)str->str, str->len ); + } + + return dup; +} + +static void merge( pr_node_t* game, pr_node_t* game2, const char* key ) +{ + unsigned i, j, k, found, merged; + pr_node_t* game1; + pr_node_t* newgame; + pr_key_t* key1; + lx_string_t* value1; + pr_key_t* key2; + lx_string_t* value2; + + for ( ; game2; game2 = game2->next ) + { + value2 = NULL; + + for ( i = 0; i < game2->count; i++ ) + { + key2 = &game2->pairs[ i ].key; + + if ( cmpkeystr( key2, key ) ) + { + value2 = &game2->pairs[ i ].value; + break; + } + } + + if ( !value2 ) + { + continue; + } + + merged = 0; + + for ( game1 = game, value1 = NULL; game1; game1 = game1->next ) + { + for ( i = 0; i < game1->count; i++ ) + { + key1 = &game1->pairs[ i ].key; + + if ( cmpkeystr( key1, key ) ) + { + value1 = &game1->pairs[ i ].value; + + if ( value1->len == value2->len && !strncmp( value1->str, value2->str, value1->len ) ) + { + merged = 1; + + for ( k = 0; k < game2->count; k++ ) + { + key2 = &game2->pairs[ k ].key; + found = game1->count; + + for ( j = 0; j < game1->count; j++ ) + { + key1 = &game1->pairs[ j ].key; + + if ( cmpkeys( key1, key2 ) ) + { + found = j; + break; + } + } + + if ( found == game1->count ) + { + game1->pairs[ found ] = game2->pairs[ k ]; + game1->count++; + } + else if ( game2->pairs[ k ].value.len ) + { + /* only overwrite if value is not empty */ + game1->pairs[ found ] = game2->pairs[ k ]; + } + } + } + } + } + } + + if ( !merged ) + { + /* add */ + newgame = (pr_node_t*)malloc( sizeof( pr_node_t ) ); + *newgame = *game2; + newgame->next = game->next; + game->next = newgame; + } + } +} + +typedef struct +{ + const char* key; + const char* new_key; + int type; +} +mapping_t; + +enum +{ + TYPE_STRING, + TYPE_UINT, + TYPE_HEXA, + TYPE_RAW +} +type_t; + +static int provider( void* ctx, struct rmsgpack_dom_value* out ) +{ + static mapping_t map[] = + { + { "name", "name", TYPE_STRING }, + { "description", "description", TYPE_STRING }, + { "rom.name", "rom_name", TYPE_STRING }, + { "rom.size", "size", TYPE_UINT }, + { "users", "users", TYPE_UINT }, + { "releasemonth", "releasemonth", TYPE_UINT }, + { "releaseyear", "releaseyear", TYPE_UINT }, + { "rumble", "rumble", TYPE_UINT }, + { "analog", "analog", TYPE_UINT }, + { "famitsu_rating", "famitsu_rating", TYPE_UINT }, + { "edge_rating", "edge_rating", TYPE_UINT }, + { "edge_issue", "edge_issue", TYPE_UINT }, + { "edge_review", "edge_review", TYPE_STRING }, + { "enhancement_hw", "enhancement_hw", TYPE_STRING }, + { "barcode", "barcode", TYPE_STRING }, + { "esrb_rating", "esrb_rating", TYPE_STRING }, + { "elspa_rating", "elspa_rating", TYPE_STRING }, + { "pegi_rating", "pegi_rating", TYPE_STRING }, + { "cero_rating", "cero_rating", TYPE_STRING }, + { "franchise", "franchise", TYPE_STRING }, + { "developer", "developer", TYPE_STRING }, + { "publisher", "publisher", TYPE_STRING }, + { "origin", "origin", TYPE_STRING }, + { "rom.crc", "crc", TYPE_HEXA }, + { "rom.md5", "md5", TYPE_HEXA }, + { "rom.sha1", "sha1", TYPE_HEXA }, + { "serial", "serial", TYPE_RAW }, + { "rom.serial", "serial", TYPE_RAW }, + }; + + unsigned i, j, index, fields; + pr_node_t** game_ptr = (pr_node_t**)ctx; + pr_node_t* game = *game_ptr; + + if ( game == NULL ) + { + return 1; + } + + *game_ptr = game->next; + + out->type = RDT_MAP; + out->val.map.items = calloc( game->count, sizeof( struct rmsgpack_dom_pair ) ); + + index = fields = 0; + + for ( i = 0; i < game->count; i++ ) + { + out->val.map.items[ i ].key.type = RDT_STRING; + + for ( j = 0; j < sizeof( map ) / sizeof( map[ 0 ] ); j++ ) + { + if ( ( fields & ( 1 << j ) ) == 0 && cmpkeystr( &game->pairs[ i ].key, map[ j ].key ) ) + { + fields |= 1 << j; /* avoid overrides */ + + out->val.map.items[ index ].key.val.string.len = strlen( map[ j ].new_key ); + out->val.map.items[ index ].key.val.string.buff = strdup( map[ j ].new_key ); + + switch ( map[ j ].type ) + { + case TYPE_STRING: + out->val.map.items[ index ].value.type = RDT_STRING; + out->val.map.items[ index ].value.val.string.len = game->pairs[ i ].value.len; + out->val.map.items[ index ].value.val.string.buff = dup_string( &game->pairs[ i ].value ); + index++; + break; + + case TYPE_HEXA: + out->val.map.items[ index ].value.type = RDT_BINARY; + out->val.map.items[ index ].value.val.binary.len = game->pairs[ i ].value.len / 2; + out->val.map.items[ index ].value.val.binary.buff = dup_hexa( &game->pairs[ i ].value ); + index++; + break; + + case TYPE_UINT: + out->val.map.items[ index ].value.type = RDT_UINT; + out->val.map.items[ index ].value.val.uint_ = strtol( game->pairs[ i ].value.str, NULL, 10 ); + index++; + break; + + case TYPE_RAW: + out->val.map.items[ index ].value.type = RDT_BINARY; + out->val.map.items[ index ].value.val.binary.len = game->pairs[ i ].value.len; + out->val.map.items[ index ].value.val.binary.buff = dup_memory( &game->pairs[ i ].value ); + index++; + break; + + default: + fprintf( stderr, "Unhandled type in mapping %u (%s => %s)\n", j, map[ j ].key, map[ j ].new_key ); + break; + } + + break; + } + } + } + + out->val.map.len = index; + return 0; +} + +static const char* read_file( const char* filename, unsigned* size ) +{ + char* source; + RFILE* file = retro_fopen( filename, RFILE_MODE_READ, -1 ); + + if ( file == NULL ) + { + fprintf( stderr, "Error opening file: %s\n", strerror( errno ) ); + return NULL; + } + + retro_fseek( file, 0, SEEK_END ); + *size = retro_ftell( file ); + retro_fseek( file, 0, SEEK_SET ); + + source = (char*)malloc( *size + 1 ); + + if ( source == NULL ) + { + retro_fclose( file ); + fprintf( stderr, "Out of memory\n" ); + return NULL; + } + + retro_fread( file, (void*)source, *size ); + retro_fclose( file ); + source[ *size ] = 0; + + return source; +} + +static int parse( pr_state_t* parser, const char* filename, const char* source, unsigned size, const char* key ) +{ + pr_node_t** prev; + pr_node_t* game; + int i, found; + + printf( "Parsing dat file '%s'...\n", filename ); + + switch ( pr_parse( parser ) ) + { + case PR_UNTERMINATED_STRING: fprintf( stderr, "%s:%u: Unterminated string\n", filename, parser->lexer.line ); return -1; + case PR_INVALID_CHARACTER: fprintf( stderr, "%s:%u: Invalid character '%s'\n", filename, parser->lexer.line, printchar( parser ) ); return -1; + case PR_UNEXPECTED_TOKEN: fprintf( stderr, "%s:%u: Unexpected token \"%s\"\n", filename, parser->lexer.line, printtoken( parser ) ); return -1; + case PR_OUT_OF_MEMORY: fprintf( stderr, "%s:%u: Out of memory\n", filename, parser->lexer.line ); return -1; + } + + prev = &parser->first; + game = parser->first; + + while ( game ) + { + found = 0; + + for ( i = 0; i < game->count; i++ ) + { + if ( cmpkeystr( &game->pairs[ i ].key, key ) ) + { + found = 1; + break; + } + } + + if ( found ) + { + prev = &game->next; + } + else + { + *prev = game->next; + } + + game = game->next; + } + + return 0; +} + +int main( int argc, const char* argv[] ) +{ + const char* source; + unsigned size; + pr_state_t parser; + pr_node_t* game; + RFILE* out; + int i, res = 1; + + if ( argc < 4 ) + { + fprintf( stderr, "usage:\nplain_converter \n\n" ); + return 1; + } + + source = read_file( argv[ 3 ], &size ); + pr_new( &parser, source, size ); + + if ( source && parse( &parser, argv[ 3 ], source, size, argv[ 2 ] ) == 0 ) + { + game = parser.first; + + for ( i = 4; i < argc; i++ ) + { + source = read_file( argv[ i ], &size ); + pr_new( &parser, source, size ); + + if ( source && parse( &parser, argv[ i ], source, size, argv[ 2 ] ) == 0 ) + { + merge( game, parser.first, argv[ 2 ] ); + } + } + + out = retro_fopen( argv[ 1 ], RFILE_MODE_WRITE, -1 ); + + if ( out != NULL ) + { + res = libretrodb_create( out, &provider, (void*)&game ); + retro_fclose( out ); + res = 0; + } + } + + /* HACK Don't free anything, let the OS take care of that */ + return res; +} +