From: Mattias Engdegård Date: Sat, 10 Feb 2024 20:14:09 +0000 (+0100) Subject: Add a proper type for obarrays X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=54978c3a632ae8732fa02e01e2fe1bff078e7067;p=emacs.git Add a proper type for obarrays The new opaque type replaces the previous use of vectors for obarrays. `obarray-make` now returns objects of this type. Functions that take obarrays continue to accept vectors for compatibility, now just using their first slot to store an actual obarray object. obarray-size and obarray-default-size now obsolete. * lisp/obarray.el (obarray-default-size, obarray-size): Declare obsolete. (obarray-make, obarrayp, obarray-clear): Remove from here. * src/fns.c (reduce_emacs_uint_to_hash_hash): Remove from here. * src/lisp.h (struct Lisp_Obarray, OBARRAYP, XOBARRAY, CHECK_OBARRAY) (make_lisp_obarray, obarray_size, check_obarray) (obarray_iter_t, make_obarray_iter, obarray_iter_at_end) (obarray_iter_step, obarray_iter_symbol, DOOBARRAY, knuth_hash): New. (reduce_emacs_uint_to_hash_hash): Moved here. * src/lread.c (check_obarray): Renamed and reworked as... (checked_obarray_slow): ...this. (intern_sym, Funintern, oblookup, map_obarray) (Finternal__obarray_buckets): Adapt to new type. (obarray_index, allocate_obarray, make_obarray, grow_obarray) (obarray_default_bits, Fobarray_make, Fobarrayp, Fobarray_clear): New. * etc/emacs_lldb.py (Lisp_Object): * lisp/emacs-lisp/cl-macs.el (`(,type . ,pred)): * lisp/emacs-lisp/cl-preloaded.el (cl--typeof-types): * lisp/emacs-lisp/comp-common.el (comp-known-type-specifiers): * lisp/emacs-lisp/comp.el (comp-known-predicates): * src/alloc.c (cleanup_vector, process_mark_stack): * src/data.c (Ftype_of, syms_of_data): * src/minibuf.c (Ftry_completion, Fall_completions, Ftest_completion): * src/pdumper.c (dump_obarray_buckets, dump_obarray, dump_vectorlike): * src/print.c (print_vectorlike_unreadable): * test/lisp/abbrev-tests.el (abbrev-make-abbrev-table-test): * test/lisp/obarray-tests.el (obarrayp-test) (obarrayp-unchecked-content-test, obarray-make-default-test) (obarray-make-with-size-test): Adapt to new type. (cherry picked from commit 462d8ba813e07a25b71f5c1b38810a29e21f784c) --- diff --git a/etc/emacs_lldb.py b/etc/emacs_lldb.py index fdf4314e2d0..9865fe391a2 100644 --- a/etc/emacs_lldb.py +++ b/etc/emacs_lldb.py @@ -56,6 +56,7 @@ class Lisp_Object: "PVEC_BOOL_VECTOR": "struct Lisp_Bool_Vector", "PVEC_BUFFER": "struct buffer", "PVEC_HASH_TABLE": "struct Lisp_Hash_Table", + "PVEC_OBARRAY": "struct Lisp_Obarray", "PVEC_TERMINAL": "struct terminal", "PVEC_WINDOW_CONFIGURATION": "struct save_window_data", "PVEC_SUBR": "struct Lisp_Subr", diff --git a/lisp/emacs-lisp/cl-macs.el b/lisp/emacs-lisp/cl-macs.el index 44ebadeebff..ddc9775bcce 100644 --- a/lisp/emacs-lisp/cl-macs.el +++ b/lisp/emacs-lisp/cl-macs.el @@ -3488,6 +3488,7 @@ Of course, we really can't know that for sure, so it's just a heuristic." (natnum . natnump) (number . numberp) (null . null) + (obarray . obarrayp) (overlay . overlayp) (process . processp) (real . numberp) diff --git a/lisp/emacs-lisp/cl-preloaded.el b/lisp/emacs-lisp/cl-preloaded.el index d533eea9e73..840219c2260 100644 --- a/lisp/emacs-lisp/cl-preloaded.el +++ b/lisp/emacs-lisp/cl-preloaded.el @@ -73,7 +73,7 @@ (module-function function atom) (buffer atom) (char-table array sequence atom) (bool-vector array sequence atom) - (frame atom) (hash-table atom) (terminal atom) + (frame atom) (hash-table atom) (terminal atom) (obarray atom) (thread atom) (mutex atom) (condvar atom) (font-spec atom) (font-entity atom) (font-object atom) (vector array sequence atom) diff --git a/lisp/emacs-lisp/comp-common.el b/lisp/emacs-lisp/comp-common.el index ca21ed05bb4..221f819e474 100644 --- a/lisp/emacs-lisp/comp-common.el +++ b/lisp/emacs-lisp/comp-common.el @@ -240,7 +240,8 @@ Used to modify the compiler environment." (integer-or-marker-p (function (t) boolean)) (integerp (function (t) boolean)) (interactive-p (function () boolean)) - (intern-soft (function ((or string symbol) &optional vector) symbol)) + (intern-soft (function ((or string symbol) &optional (or obarray vector)) + symbol)) (invocation-directory (function () string)) (invocation-name (function () string)) (isnan (function (float) boolean)) diff --git a/lisp/emacs-lisp/comp.el b/lisp/emacs-lisp/comp.el index e0da01bcc5d..ae964b041d0 100644 --- a/lisp/emacs-lisp/comp.el +++ b/lisp/emacs-lisp/comp.el @@ -214,6 +214,7 @@ Useful to hook into pass checkers.") (number-or-marker-p . number-or-marker) (numberp . number) (numberp . number) + (obarrayp . obarray) (overlayp . overlay) (processp . process) (sequencep . sequence) diff --git a/lisp/emacs-lisp/shortdoc.el b/lisp/emacs-lisp/shortdoc.el index cde28985cd0..cbb5618ffce 100644 --- a/lisp/emacs-lisp/shortdoc.el +++ b/lisp/emacs-lisp/shortdoc.el @@ -747,9 +747,13 @@ A FUNC form can have any number of `:no-eval' (or `:no-value'), (intern :eval (intern "abc")) (intern-soft + :eval (intern-soft "list") :eval (intern-soft "Phooey!")) (make-symbol :eval (make-symbol "abc")) + (gensym + :no-eval (gensym) + :eg-result g37) "Comparing symbols" (eq :eval (eq 'abc 'abc) @@ -760,7 +764,20 @@ A FUNC form can have any number of `:no-eval' (or `:no-value'), :eval (equal 'abc 'abc)) "Name" (symbol-name - :eval (symbol-name 'abc))) + :eval (symbol-name 'abc)) + "Obarrays" + (obarray-make + :eval (obarray-make)) + (obarrayp + :eval (obarrayp (obarray-make)) + :eval (obarrayp nil)) + (unintern + :no-eval (unintern "abc" my-obarray) + :eg-result t) + (mapatoms + :no-eval (mapatoms (lambda (symbol) (print symbol)) my-obarray)) + (obarray-clear + :no-eval (obarray-clear my-obarray))) (define-short-documentation-group comparison "General-purpose" diff --git a/lisp/obarray.el b/lisp/obarray.el index e1ebb2ade51..e6e51c1382a 100644 --- a/lisp/obarray.el +++ b/lisp/obarray.el @@ -27,24 +27,12 @@ ;;; Code: -(defconst obarray-default-size 59 - "The value 59 is an arbitrary prime number that gives a good hash.") +(defconst obarray-default-size 4) +(make-obsolete-variable 'obarray-default-size + "obarrays now grow automatically" "30.1") -(defun obarray-make (&optional size) - "Return a new obarray of size SIZE or `obarray-default-size'." - (let ((size (or size obarray-default-size))) - (if (< 0 size) - (make-vector size 0) - (signal 'wrong-type-argument '(size 0))))) - -(defun obarray-size (ob) - "Return the number of slots of obarray OB." - (length ob)) - -(defun obarrayp (object) - "Return t if OBJECT is an obarray." - (and (vectorp object) - (< 0 (length object)))) +(defun obarray-size (_ob) obarray-default-size) +(make-obsolete 'obarray-size "obarrays now grow automatically" "30.1") ;; Don’t use obarray as a variable name to avoid shadowing. (defun obarray-get (ob name) @@ -66,10 +54,5 @@ Return t on success, nil otherwise." "Call function FN on every symbol in obarray OB and return nil." (mapatoms fn ob)) -(defun obarray-clear (ob) - "Remove all symbols from obarray OB." - ;; FIXME: This doesn't change the symbols to uninterned status. - (fillarray ob 0)) - (provide 'obarray) ;;; obarray.el ends here diff --git a/src/alloc.c b/src/alloc.c index 8c94c7eb33c..2ffd2415447 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -360,13 +360,13 @@ static struct gcstat object_ct total_intervals, total_free_intervals; object_ct total_buffers; - /* Size of the ancillary arrays of live hash-table objects. + /* Size of the ancillary arrays of live hash-table and obarray objects. The objects themselves are not included (counted as vectors above). */ byte_ct total_hash_table_bytes; } gcstat; -/* Total size of ancillary arrays of all allocated hash-table objects, - both dead and alive. This number is always kept up-to-date. */ +/* Total size of ancillary arrays of all allocated hash-table and obarray + objects, both dead and alive. This number is always kept up-to-date. */ static ptrdiff_t hash_table_allocated_bytes = 0; /* Points to memory space allocated as "spare", to be freed if we run @@ -3455,6 +3455,15 @@ cleanup_vector (struct Lisp_Vector *vector) hash_table_allocated_bytes -= bytes; } } + break; + case PVEC_OBARRAY: + { + struct Lisp_Obarray *o = PSEUDOVEC_STRUCT (vector, Lisp_Obarray); + xfree (o->buckets); + ptrdiff_t bytes = obarray_size (o) * sizeof *o->buckets; + hash_table_allocated_bytes -= bytes; + } + break; /* Keep the switch exhaustive. */ case PVEC_NORMAL_VECTOR: case PVEC_FREE: @@ -5632,7 +5641,8 @@ valid_lisp_object_p (Lisp_Object obj) return 0; } -/* Like xmalloc, but makes allocation count toward the total consing. +/* Like xmalloc, but makes allocation count toward the total consing + and hash table or obarray usage. Return NULL for a zero-sized allocation. */ void * hash_table_alloc_bytes (ptrdiff_t nbytes) @@ -7310,6 +7320,14 @@ process_mark_stack (ptrdiff_t base_sp) break; } + case PVEC_OBARRAY: + { + struct Lisp_Obarray *o = (struct Lisp_Obarray *)ptr; + set_vector_marked (ptr); + mark_stack_push_values (o->buckets, obarray_size (o)); + break; + } + case PVEC_CHAR_TABLE: case PVEC_SUB_CHAR_TABLE: mark_char_table (ptr, (enum pvec_type) pvectype); diff --git a/src/data.c b/src/data.c index f2f35fb355a..bb4cdd62d66 100644 --- a/src/data.c +++ b/src/data.c @@ -231,6 +231,7 @@ for example, (type-of 1) returns `integer'. */) case PVEC_BOOL_VECTOR: return Qbool_vector; case PVEC_FRAME: return Qframe; case PVEC_HASH_TABLE: return Qhash_table; + case PVEC_OBARRAY: return Qobarray; case PVEC_FONT: if (FONT_SPEC_P (object)) return Qfont_spec; @@ -4229,6 +4230,7 @@ syms_of_data (void) DEFSYM (Qtreesit_parser, "treesit-parser"); DEFSYM (Qtreesit_node, "treesit-node"); DEFSYM (Qtreesit_compiled_query, "treesit-compiled-query"); + DEFSYM (Qobarray, "obarray"); DEFSYM (Qdefun, "defun"); diff --git a/src/fns.c b/src/fns.c index 550545d1486..0a64e515402 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4450,16 +4450,6 @@ cmpfn_user_defined (Lisp_Object key1, Lisp_Object key2, return hash_table_user_defined_call (ARRAYELTS (args), args, h); } -/* Reduce an EMACS_UINT hash value to hash_hash_t. */ -static inline hash_hash_t -reduce_emacs_uint_to_hash_hash (EMACS_UINT x) -{ - verify (sizeof x <= 2 * sizeof (hash_hash_t)); - return (sizeof x == sizeof (hash_hash_t) - ? x - : x ^ (x >> (8 * (sizeof x - sizeof (hash_hash_t))))); -} - static EMACS_INT sxhash_eq (Lisp_Object key) { @@ -4645,16 +4635,11 @@ copy_hash_table (struct Lisp_Hash_Table *h1) return make_lisp_hash_table (h2); } - /* Compute index into the index vector from a hash value. */ static inline ptrdiff_t hash_index_index (struct Lisp_Hash_Table *h, hash_hash_t hash) { - /* Knuth multiplicative hashing, tailored for 32-bit indices - (avoiding a 64-bit multiply). */ - uint32_t alpha = 2654435769; /* 2**32/phi */ - /* Note the cast to uint64_t, to make it work for index_bits=0. */ - return (uint64_t)((uint32_t)hash * alpha) >> (32 - h->index_bits); + return knuth_hash (hash, h->index_bits); } /* Resize hash table H if it's too full. If H cannot be resized diff --git a/src/lisp.h b/src/lisp.h index b02466390f1..5fbbef80e8e 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -1032,6 +1032,7 @@ enum pvec_type PVEC_BOOL_VECTOR, PVEC_BUFFER, PVEC_HASH_TABLE, + PVEC_OBARRAY, PVEC_TERMINAL, PVEC_WINDOW_CONFIGURATION, PVEC_SUBR, @@ -2386,6 +2387,118 @@ INLINE int definition is done by lread.c's define_symbol. */ #define DEFSYM(sym, name) /* empty */ + +struct Lisp_Obarray +{ + union vectorlike_header header; + + /* Array of 2**size_bits values, each being either a (bare) symbol or + the fixnum 0. The symbols for each bucket are chained via + their s.next field. */ + Lisp_Object *buckets; + + unsigned size_bits; /* log2(size of buckets vector) */ + unsigned count; /* number of symbols in obarray */ +}; + +INLINE bool +OBARRAYP (Lisp_Object a) +{ + return PSEUDOVECTORP (a, PVEC_OBARRAY); +} + +INLINE struct Lisp_Obarray * +XOBARRAY (Lisp_Object a) +{ + eassert (OBARRAYP (a)); + return XUNTAG (a, Lisp_Vectorlike, struct Lisp_Obarray); +} + +INLINE void +CHECK_OBARRAY (Lisp_Object x) +{ + CHECK_TYPE (OBARRAYP (x), Qobarrayp, x); +} + +INLINE Lisp_Object +make_lisp_obarray (struct Lisp_Obarray *o) +{ + eassert (PSEUDOVECTOR_TYPEP (&o->header, PVEC_OBARRAY)); + return make_lisp_ptr (o, Lisp_Vectorlike); +} + +INLINE ptrdiff_t +obarray_size (const struct Lisp_Obarray *o) +{ + return (ptrdiff_t)1 << o->size_bits; +} + +Lisp_Object check_obarray_slow (Lisp_Object); + +/* Return an obarray object from OBARRAY or signal an error. */ +INLINE Lisp_Object +check_obarray (Lisp_Object obarray) +{ + return OBARRAYP (obarray) ? obarray : check_obarray_slow (obarray); +} + +/* Obarray iterator state. Don't access these members directly. + The iterator functions must be called in the order followed by DOOBARRAY. */ +typedef struct { + struct Lisp_Obarray *o; + ptrdiff_t idx; /* Current bucket index. */ + struct Lisp_Symbol *symbol; /* Current symbol, or NULL if at end + of current bucket. */ +} obarray_iter_t; + +INLINE obarray_iter_t +make_obarray_iter (struct Lisp_Obarray *oa) +{ + return (obarray_iter_t){.o = oa, .idx = -1, .symbol = NULL}; +} + +/* Whether IT has reached the end and there are no more symbols. + If true, IT is dead and cannot be used any more. */ +INLINE bool +obarray_iter_at_end (obarray_iter_t *it) +{ + if (it->symbol) + return false; + ptrdiff_t size = obarray_size (it->o); + while (++it->idx < size) + { + Lisp_Object obj = it->o->buckets[it->idx]; + if (!BASE_EQ (obj, make_fixnum (0))) + { + it->symbol = XBARE_SYMBOL (obj); + return false; + } + } + return true; +} + +/* Advance IT to the next symbol if any. */ +INLINE void +obarray_iter_step (obarray_iter_t *it) +{ + it->symbol = it->symbol->u.s.next; +} + +/* The Lisp symbol at IT, if obarray_iter_at_end returned false. */ +INLINE Lisp_Object +obarray_iter_symbol (obarray_iter_t *it) +{ + return make_lisp_symbol (it->symbol); +} + +/* Iterate IT over the symbols of the obarray OA. + The body shouldn't add or remove symbols in OA, but disobeying that rule + only risks symbols to be iterated more than once or not at all, + not crashes or data corruption. */ +#define DOOBARRAY(oa, it) \ + for (obarray_iter_t it = make_obarray_iter (oa); \ + !obarray_iter_at_end (&it); obarray_iter_step (&it)) + /*********************************************************************** Hash Tables @@ -2666,6 +2779,28 @@ SXHASH_REDUCE (EMACS_UINT x) return (x ^ x >> (EMACS_INT_WIDTH - FIXNUM_BITS)) & INTMASK; } +/* Reduce an EMACS_UINT hash value to hash_hash_t. */ +INLINE hash_hash_t +reduce_emacs_uint_to_hash_hash (EMACS_UINT x) +{ + verify (sizeof x <= 2 * sizeof (hash_hash_t)); + return (sizeof x == sizeof (hash_hash_t) + ? x + : x ^ (x >> (8 * (sizeof x - sizeof (hash_hash_t))))); +} + +/* Reduce HASH to a value BITS wide. */ +INLINE ptrdiff_t +knuth_hash (hash_hash_t hash, unsigned bits) +{ + /* Knuth multiplicative hashing, tailored for 32-bit indices + (avoiding a 64-bit multiply). */ + uint32_t alpha = 2654435769; /* 2**32/phi */ + /* Note the cast to uint64_t, to make it work for bits=0. */ + return (uint64_t)((uint32_t)hash * alpha) >> (32 - bits); +} + + struct Lisp_Marker { union vectorlike_header header; @@ -4585,7 +4720,6 @@ extern ptrdiff_t evxprintf (char **, ptrdiff_t *, char *, ptrdiff_t, ATTRIBUTE_FORMAT_PRINTF (5, 0); /* Defined in lread.c. */ -extern Lisp_Object check_obarray (Lisp_Object); extern Lisp_Object intern_1 (const char *, ptrdiff_t); extern Lisp_Object intern_c_string_1 (const char *, ptrdiff_t); extern Lisp_Object intern_driver (Lisp_Object, Lisp_Object, Lisp_Object); diff --git a/src/lread.c b/src/lread.c index c11c641440d..c4a34c5d73f 100644 --- a/src/lread.c +++ b/src/lread.c @@ -4886,30 +4886,43 @@ static Lisp_Object initial_obarray; static size_t oblookup_last_bucket_number; -/* Get an error if OBARRAY is not an obarray. - If it is one, return it. */ +static Lisp_Object make_obarray (unsigned bits); +/* Slow path obarray check: return the obarray to use or signal an error. */ Lisp_Object -check_obarray (Lisp_Object obarray) +check_obarray_slow (Lisp_Object obarray) { - /* We don't want to signal a wrong-type-argument error when we are - shutting down due to a fatal error, and we don't want to hit - assertions in VECTORP and ASIZE if the fatal error was during GC. */ - if (!fatal_error_in_progress - && (!VECTORP (obarray) || ASIZE (obarray) == 0)) + /* For compatibility, we accept vectors whose first element is 0, + and store an obarray object there. */ + if (VECTORP (obarray) && ASIZE (obarray) > 0) { - /* If Vobarray is now invalid, force it to be valid. */ - if (EQ (Vobarray, obarray)) Vobarray = initial_obarray; - wrong_type_argument (Qvectorp, obarray); + Lisp_Object obj = AREF (obarray, 0); + if (OBARRAYP (obj)) + return obj; + if (BASE_EQ (obj, make_fixnum (0))) + { + /* Put an actual obarray object in the first slot. + The rest of the vector remains unused. */ + obj = make_obarray (0); + ASET (obarray, 0, obj); + return obj; + } } - return obarray; + /* Reset Vobarray to the standard obarray for nicer error handling. */ + if (BASE_EQ (Vobarray, obarray)) Vobarray = initial_obarray; + + wrong_type_argument (Qobarrayp, obarray); } +static void grow_obarray (struct Lisp_Obarray *o); + /* Intern symbol SYM in OBARRAY using bucket INDEX. */ +/* FIXME: retype arguments as pure C types */ static Lisp_Object intern_sym (Lisp_Object sym, Lisp_Object obarray, Lisp_Object index) { + eassert (BARE_SYMBOL_P (sym) && OBARRAYP (obarray) && FIXNUMP (index)); struct Lisp_Symbol *s = XBARE_SYMBOL (sym); s->u.s.interned = (BASE_EQ (obarray, initial_obarray) ? SYMBOL_INTERNED_IN_INITIAL_OBARRAY @@ -4925,9 +4938,13 @@ intern_sym (Lisp_Object sym, Lisp_Object obarray, Lisp_Object index) SET_SYMBOL_VAL (s, sym); } - Lisp_Object *ptr = aref_addr (obarray, XFIXNUM (index)); + struct Lisp_Obarray *o = XOBARRAY (obarray); + Lisp_Object *ptr = o->buckets + XFIXNUM (index); s->u.s.next = BARE_SYMBOL_P (*ptr) ? XBARE_SYMBOL (*ptr) : NULL; *ptr = sym; + o->count++; + if (o->count > obarray_size (o)) + grow_obarray (o); return sym; } @@ -5082,7 +5099,6 @@ usage: (unintern NAME OBARRAY) */) { register Lisp_Object tem; Lisp_Object string; - size_t hash; if (NILP (obarray)) obarray = Vobarray; obarray = check_obarray (obarray); @@ -5122,41 +5138,42 @@ usage: (unintern NAME OBARRAY) */) /* if (NILP (tem) || EQ (tem, Qt)) error ("Attempt to unintern t or nil"); */ - XBARE_SYMBOL (tem)->u.s.interned = SYMBOL_UNINTERNED; + struct Lisp_Symbol *sym = XBARE_SYMBOL (tem); + sym->u.s.interned = SYMBOL_UNINTERNED; - hash = oblookup_last_bucket_number; + ptrdiff_t idx = oblookup_last_bucket_number; + Lisp_Object *loc = &XOBARRAY (obarray)->buckets[idx]; - if (BASE_EQ (AREF (obarray, hash), tem)) - { - if (XBARE_SYMBOL (tem)->u.s.next) - { - Lisp_Object sym; - XSETSYMBOL (sym, XBARE_SYMBOL (tem)->u.s.next); - ASET (obarray, hash, sym); - } - else - ASET (obarray, hash, make_fixnum (0)); - } + eassert (BARE_SYMBOL_P (*loc)); + struct Lisp_Symbol *prev = XBARE_SYMBOL (*loc); + if (sym == prev) + *loc = sym->u.s.next ? make_lisp_symbol (sym->u.s.next) : make_fixnum (0); else - { - Lisp_Object tail, following; + while (1) + { + struct Lisp_Symbol *next = prev->u.s.next; + if (next == sym) + { + prev->u.s.next = next->u.s.next; + break; + } + prev = next; + } - for (tail = AREF (obarray, hash); - XBARE_SYMBOL (tail)->u.s.next; - tail = following) - { - XSETSYMBOL (following, XBARE_SYMBOL (tail)->u.s.next); - if (BASE_EQ (following, tem)) - { - set_symbol_next (tail, XBARE_SYMBOL (following)->u.s.next); - break; - } - } - } + XOBARRAY (obarray)->count--; return Qt; } + +/* Bucket index of the string STR of length SIZE_BYTE bytes in obarray OA. */ +static ptrdiff_t +obarray_index (struct Lisp_Obarray *oa, const char *str, ptrdiff_t size_byte) +{ + EMACS_UINT hash = hash_string (str, size_byte); + return knuth_hash (reduce_emacs_uint_to_hash_hash (hash), oa->size_bits); +} + /* Return the symbol in OBARRAY whose names matches the string of SIZE characters (SIZE_BYTE bytes) at PTR. If there is no such symbol, return the integer bucket number of @@ -5167,36 +5184,27 @@ usage: (unintern NAME OBARRAY) */) Lisp_Object oblookup (Lisp_Object obarray, register const char *ptr, ptrdiff_t size, ptrdiff_t size_byte) { - size_t hash; - size_t obsize; - register Lisp_Object tail; - Lisp_Object bucket, tem; + struct Lisp_Obarray *o = XOBARRAY (obarray); + ptrdiff_t idx = obarray_index (o, ptr, size_byte); + Lisp_Object bucket = o->buckets[idx]; - obarray = check_obarray (obarray); - /* This is sometimes needed in the middle of GC. */ - obsize = gc_asize (obarray); - hash = hash_string (ptr, size_byte) % obsize; - bucket = AREF (obarray, hash); - oblookup_last_bucket_number = hash; - if (BASE_EQ (bucket, make_fixnum (0))) - ; - else if (!BARE_SYMBOL_P (bucket)) - /* Like CADR error message. */ - xsignal2 (Qwrong_type_argument, Qobarrayp, - build_string ("Bad data in guts of obarray")); - else - for (tail = bucket; ; XSETSYMBOL (tail, XBARE_SYMBOL (tail)->u.s.next)) - { - Lisp_Object name = XBARE_SYMBOL (tail)->u.s.name; - if (SBYTES (name) == size_byte - && SCHARS (name) == size - && !memcmp (SDATA (name), ptr, size_byte)) - return tail; - else if (XBARE_SYMBOL (tail)->u.s.next == 0) - break; - } - XSETINT (tem, hash); - return tem; + oblookup_last_bucket_number = idx; + if (!BASE_EQ (bucket, make_fixnum (0))) + { + Lisp_Object sym = bucket; + while (1) + { + struct Lisp_Symbol *s = XBARE_SYMBOL (sym); + Lisp_Object name = s->u.s.name; + if (SBYTES (name) == size_byte && SCHARS (name) == size + && memcmp (SDATA (name), ptr, size_byte) == 0) + return sym; + if (s->u.s.next == NULL) + break; + sym = make_lisp_symbol(s->u.s.next); + } + } + return make_fixnum (idx); } /* Like 'oblookup', but considers 'Vread_symbol_shorthands', @@ -5263,24 +5271,134 @@ oblookup_considering_shorthand (Lisp_Object obarray, const char *in, } -void -map_obarray (Lisp_Object obarray, void (*fn) (Lisp_Object, Lisp_Object), Lisp_Object arg) +static struct Lisp_Obarray * +allocate_obarray (void) { - ptrdiff_t i; - register Lisp_Object tail; - CHECK_VECTOR (obarray); - for (i = ASIZE (obarray) - 1; i >= 0; i--) + return ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_Obarray, PVEC_OBARRAY); +} + +static Lisp_Object +make_obarray (unsigned bits) +{ + struct Lisp_Obarray *o = allocate_obarray (); + o->count = 0; + o->size_bits = bits; + ptrdiff_t size = (ptrdiff_t)1 << bits; + o->buckets = hash_table_alloc_bytes (size * sizeof *o->buckets); + for (ptrdiff_t i = 0; i < size; i++) + o->buckets[i] = make_fixnum (0); + return make_lisp_obarray (o); +} + +enum { + obarray_default_bits = 3, + word_size_log2 = word_size < 8 ? 5 : 6, /* good enough */ + obarray_max_bits = min (8 * sizeof (int), + 8 * sizeof (ptrdiff_t) - word_size_log2) - 1, +}; + +static void +grow_obarray (struct Lisp_Obarray *o) +{ + ptrdiff_t old_size = obarray_size (o); + eassert (o->count > old_size); + Lisp_Object *old_buckets = o->buckets; + + int new_bits = o->size_bits + 1; + if (new_bits > obarray_max_bits) + error ("Obarray too big"); + ptrdiff_t new_size = (ptrdiff_t)1 << new_bits; + o->buckets = hash_table_alloc_bytes (new_size * sizeof *o->buckets); + for (ptrdiff_t i = 0; i < new_size; i++) + o->buckets[i] = make_fixnum (0); + o->size_bits = new_bits; + + /* Rehash symbols. + FIXME: this is expensive since we need to recompute the hash for every + symbol name. Would it be reasonable to store it in the symbol? */ + for (ptrdiff_t i = 0; i < old_size; i++) { - tail = AREF (obarray, i); - if (BARE_SYMBOL_P (tail)) - while (1) - { - (*fn) (tail, arg); - if (XBARE_SYMBOL (tail)->u.s.next == 0) - break; - XSETSYMBOL (tail, XBARE_SYMBOL (tail)->u.s.next); - } + Lisp_Object obj = old_buckets[i]; + if (BARE_SYMBOL_P (obj)) + { + struct Lisp_Symbol *s = XBARE_SYMBOL (obj); + while (1) + { + Lisp_Object name = s->u.s.name; + ptrdiff_t idx = obarray_index (o, SSDATA (name), SBYTES (name)); + Lisp_Object *loc = o->buckets + idx; + struct Lisp_Symbol *next = s->u.s.next; + s->u.s.next = BARE_SYMBOL_P (*loc) ? XBARE_SYMBOL (*loc) : NULL; + *loc = make_lisp_symbol (s); + if (next == NULL) + break; + s = next; + } + } } + + hash_table_free_bytes (old_buckets, old_size * sizeof *old_buckets); +} + +DEFUN ("obarray-make", Fobarray_make, Sobarray_make, 0, 1, 0, + doc: /* Return a new obarray of size SIZE. +The obarray will grow to accommodate any number of symbols; the size, if +given, is only a hint for the expected number. */) + (Lisp_Object size) +{ + int bits; + if (NILP (size)) + bits = obarray_default_bits; + else + { + CHECK_FIXNAT (size); + EMACS_UINT n = XFIXNUM (size); + bits = elogb (n) + 1; + if (bits > obarray_max_bits) + xsignal (Qargs_out_of_range, size); + } + return make_obarray (bits); +} + +DEFUN ("obarrayp", Fobarrayp, Sobarrayp, 1, 1, 0, + doc: /* Return t iff OBJECT is an obarray. */) + (Lisp_Object object) +{ + return OBARRAYP (object) ? Qt : Qnil; +} + +DEFUN ("obarray-clear", Fobarray_clear, Sobarray_clear, 1, 1, 0, + doc: /* Remove all symbols from OBARRAY. */) + (Lisp_Object obarray) +{ + CHECK_OBARRAY (obarray); + struct Lisp_Obarray *o = XOBARRAY (obarray); + + /* This function does not bother setting the status of its contained symbols + to uninterned. It doesn't matter very much. */ + int new_bits = obarray_default_bits; + int new_size = (ptrdiff_t)1 << new_bits; + Lisp_Object *new_buckets + = hash_table_alloc_bytes (new_size * sizeof *new_buckets); + for (ptrdiff_t i = 0; i < new_size; i++) + new_buckets[i] = make_fixnum (0); + + int old_size = obarray_size (o); + hash_table_free_bytes (o->buckets, old_size * sizeof *o->buckets); + o->buckets = new_buckets; + o->size_bits = new_bits; + o->count = 0; + + return Qnil; +} + +void +map_obarray (Lisp_Object obarray, + void (*fn) (Lisp_Object, Lisp_Object), Lisp_Object arg) +{ + CHECK_OBARRAY (obarray); + DOOBARRAY (XOBARRAY (obarray), it) + (*fn) (obarray_iter_symbol (&it), arg); } static void @@ -5307,12 +5425,13 @@ DEFUN ("internal--obarray-buckets", (Lisp_Object obarray) { obarray = check_obarray (obarray); - ptrdiff_t size = ASIZE (obarray); + ptrdiff_t size = obarray_size (XOBARRAY (obarray)); + Lisp_Object ret = Qnil; for (ptrdiff_t i = 0; i < size; i++) { Lisp_Object bucket = Qnil; - Lisp_Object sym = AREF (obarray, i); + Lisp_Object sym = XOBARRAY (obarray)->buckets[i]; if (BARE_SYMBOL_P (sym)) while (1) { @@ -5332,6 +5451,7 @@ DEFUN ("internal--obarray-buckets", void init_obarray_once (void) { + /* FIXME: use PVEC_OBARRAY */ Vobarray = make_vector (OBARRAY_SIZE, make_fixnum (0)); initial_obarray = Vobarray; staticpro (&initial_obarray); @@ -5715,6 +5835,9 @@ syms_of_lread (void) defsubr (&Smapatoms); defsubr (&Slocate_file_internal); defsubr (&Sinternal__obarray_buckets); + defsubr (&Sobarray_make); + defsubr (&Sobarrayp); + defsubr (&Sobarray_clear); DEFVAR_LISP ("obarray", Vobarray, doc: /* Symbol table for use by `intern' and `read'. diff --git a/src/minibuf.c b/src/minibuf.c index 1114e7f773c..d59d74addbe 100644 --- a/src/minibuf.c +++ b/src/minibuf.c @@ -1615,13 +1615,15 @@ or from one of the possible completions. */) ptrdiff_t bestmatchsize = 0; /* These are in bytes, too. */ ptrdiff_t compare, matchsize; + if (VECTORP (collection)) + collection = check_obarray (collection); enum { function_table, list_table, obarray_table, hash_table} type = (HASH_TABLE_P (collection) ? hash_table - : VECTORP (collection) ? obarray_table + : OBARRAYP (collection) ? obarray_table : ((NILP (collection) || (CONSP (collection) && !FUNCTIONP (collection))) ? list_table : function_table)); - ptrdiff_t idx = 0, obsize = 0; + ptrdiff_t idx = 0; int matchcount = 0; Lisp_Object bucket, zero, end, tem; @@ -1634,12 +1636,9 @@ or from one of the possible completions. */) /* If COLLECTION is not a list, set TAIL just for gc pro. */ tail = collection; + obarray_iter_t obit; if (type == obarray_table) - { - collection = check_obarray (collection); - obsize = ASIZE (collection); - bucket = AREF (collection, idx); - } + obit = make_obarray_iter (XOBARRAY (collection)); while (1) { @@ -1658,24 +1657,10 @@ or from one of the possible completions. */) } else if (type == obarray_table) { - if (!EQ (bucket, zero)) - { - if (!SYMBOLP (bucket)) - error ("Bad data in guts of obarray"); - elt = bucket; - eltstring = elt; - if (XSYMBOL (bucket)->u.s.next) - XSETSYMBOL (bucket, XSYMBOL (bucket)->u.s.next); - else - XSETFASTINT (bucket, 0); - } - else if (++idx >= obsize) + if (obarray_iter_at_end (&obit)) break; - else - { - bucket = AREF (collection, idx); - continue; - } + elt = eltstring = obarray_iter_symbol (&obit); + obarray_iter_step (&obit); } else /* if (type == hash_table) */ { @@ -1858,10 +1843,12 @@ with a space are ignored unless STRING itself starts with a space. */) { Lisp_Object tail, elt, eltstring; Lisp_Object allmatches; + if (VECTORP (collection)) + collection = check_obarray (collection); int type = HASH_TABLE_P (collection) ? 3 - : VECTORP (collection) ? 2 + : OBARRAYP (collection) ? 2 : NILP (collection) || (CONSP (collection) && !FUNCTIONP (collection)); - ptrdiff_t idx = 0, obsize = 0; + ptrdiff_t idx = 0; Lisp_Object bucket, tem, zero; CHECK_STRING (string); @@ -1872,12 +1859,9 @@ with a space are ignored unless STRING itself starts with a space. */) /* If COLLECTION is not a list, set TAIL just for gc pro. */ tail = collection; + obarray_iter_t obit; if (type == 2) - { - collection = check_obarray (collection); - obsize = ASIZE (collection); - bucket = AREF (collection, idx); - } + obit = make_obarray_iter (XOBARRAY (collection)); while (1) { @@ -1896,24 +1880,10 @@ with a space are ignored unless STRING itself starts with a space. */) } else if (type == 2) { - if (!EQ (bucket, zero)) - { - if (!SYMBOLP (bucket)) - error ("Bad data in guts of obarray"); - elt = bucket; - eltstring = elt; - if (XSYMBOL (bucket)->u.s.next) - XSETSYMBOL (bucket, XSYMBOL (bucket)->u.s.next); - else - XSETFASTINT (bucket, 0); - } - else if (++idx >= obsize) + if (obarray_iter_at_end (&obit)) break; - else - { - bucket = AREF (collection, idx); - continue; - } + elt = eltstring = obarray_iter_symbol (&obit); + obarray_iter_step (&obit); } else /* if (type == 3) */ { @@ -2059,7 +2029,7 @@ If COLLECTION is a function, it is called with three arguments: the values STRING, PREDICATE and `lambda'. */) (Lisp_Object string, Lisp_Object collection, Lisp_Object predicate) { - Lisp_Object tail, tem = Qnil, arg = Qnil; + Lisp_Object tem = Qnil, arg = Qnil; CHECK_STRING (string); @@ -2069,38 +2039,30 @@ the values STRING, PREDICATE and `lambda'. */) if (NILP (tem)) return Qnil; } - else if (VECTORP (collection)) + else if (OBARRAYP (collection) || VECTORP (collection)) { + collection = check_obarray (collection); /* Bypass intern-soft as that loses for nil. */ tem = oblookup (collection, SSDATA (string), SCHARS (string), SBYTES (string)); - if (completion_ignore_case && !SYMBOLP (tem)) - { - for (ptrdiff_t i = ASIZE (collection) - 1; i >= 0; i--) - { - tail = AREF (collection, i); - if (SYMBOLP (tail)) - while (1) - { - if (BASE_EQ (Fcompare_strings (string, make_fixnum (0), - Qnil, - Fsymbol_name (tail), - make_fixnum (0) , Qnil, Qt), - Qt)) - { - tem = tail; - break; - } - if (XSYMBOL (tail)->u.s.next == 0) - break; - XSETSYMBOL (tail, XSYMBOL (tail)->u.s.next); - } - } - } + if (completion_ignore_case && !BARE_SYMBOL_P (tem)) + DOOBARRAY (XOBARRAY (collection), it) + { + Lisp_Object obj = obarray_iter_symbol (&it); + if (BASE_EQ (Fcompare_strings (string, make_fixnum (0), + Qnil, + Fsymbol_name (obj), + make_fixnum (0) , Qnil, Qt), + Qt)) + { + tem = obj; + break; + } + } - if (!SYMBOLP (tem)) + if (!BARE_SYMBOL_P (tem)) return Qnil; } else if (HASH_TABLE_P (collection)) diff --git a/src/pdumper.c b/src/pdumper.c index 778d8facabd..ca457858219 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -2748,6 +2748,51 @@ dump_hash_table (struct dump_context *ctx, Lisp_Object object) return offset; } +static dump_off +dump_obarray_buckets (struct dump_context *ctx, const struct Lisp_Obarray *o) +{ + dump_align_output (ctx, DUMP_ALIGNMENT); + dump_off start_offset = ctx->offset; + ptrdiff_t n = obarray_size (o); + + struct dump_flags old_flags = ctx->flags; + ctx->flags.pack_objects = true; + + for (ptrdiff_t i = 0; i < n; i++) + { + Lisp_Object out; + const Lisp_Object *slot = &o->buckets[i]; + dump_object_start (ctx, &out, sizeof out); + dump_field_lv (ctx, &out, slot, slot, WEIGHT_STRONG); + dump_object_finish (ctx, &out, sizeof out); + } + + ctx->flags = old_flags; + return start_offset; +} + +static dump_off +dump_obarray (struct dump_context *ctx, Lisp_Object object) +{ +#if CHECK_STRUCTS && !defined HASH_Lisp_Obarray_XXXXXXXXXX +# error "Lisp_Hash_Table changed. See CHECK_STRUCTS comment in config.h." +#endif + const struct Lisp_Obarray *in_oa = XOBARRAY (object); + struct Lisp_Obarray munged_oa = *in_oa; + struct Lisp_Obarray *oa = &munged_oa; + START_DUMP_PVEC (ctx, &oa->header, struct Lisp_Obarray, out); + dump_pseudovector_lisp_fields (ctx, &out->header, &oa->header); + DUMP_FIELD_COPY (out, oa, count); + DUMP_FIELD_COPY (out, oa, size_bits); + dump_field_fixup_later (ctx, out, oa, &oa->buckets); + dump_off offset = finish_dump_pvec (ctx, &out->header); + dump_remember_fixup_ptr_raw + (ctx, + offset + dump_offsetof (struct Lisp_Obarray, buckets), + dump_obarray_buckets (ctx, oa)); + return offset; +} + static dump_off dump_buffer (struct dump_context *ctx, const struct buffer *in_buffer) { @@ -3031,6 +3076,8 @@ dump_vectorlike (struct dump_context *ctx, return dump_bool_vector(ctx, v); case PVEC_HASH_TABLE: return dump_hash_table (ctx, lv); + case PVEC_OBARRAY: + return dump_obarray (ctx, lv); case PVEC_BUFFER: return dump_buffer (ctx, XBUFFER (lv)); case PVEC_SUBR: diff --git a/src/print.c b/src/print.c index e2252562915..76c577ec800 100644 --- a/src/print.c +++ b/src/print.c @@ -2078,6 +2078,16 @@ print_vectorlike_unreadable (Lisp_Object obj, Lisp_Object printcharfun, } return; + case PVEC_OBARRAY: + { + struct Lisp_Obarray *o = XOBARRAY (obj); + /* FIXME: Would it make sense to print the actual symbols (up to + a limit)? */ + int i = sprintf (buf, "#", o->count); + strout (buf, i, i, printcharfun); + return; + } + /* Types handled earlier. */ case PVEC_NORMAL_VECTOR: case PVEC_RECORD: diff --git a/test/lisp/abbrev-tests.el b/test/lisp/abbrev-tests.el index bfdfac8be1b..cdd1a7832d3 100644 --- a/test/lisp/abbrev-tests.el +++ b/test/lisp/abbrev-tests.el @@ -57,12 +57,10 @@ (ert-deftest abbrev-make-abbrev-table-test () ;; Table without properties: (let ((table (make-abbrev-table))) - (should (abbrev-table-p table)) - (should (= (length table) obarray-default-size))) + (should (abbrev-table-p table))) ;; Table with one property 'foo with value 'bar: (let ((table (make-abbrev-table '(foo bar)))) (should (abbrev-table-p table)) - (should (= (length table) obarray-default-size)) (should (eq (abbrev-table-get table 'foo) 'bar)))) (ert-deftest abbrev--table-symbols-test () diff --git a/test/lisp/obarray-tests.el b/test/lisp/obarray-tests.el index dd40d0f4d76..f9f97dba535 100644 --- a/test/lisp/obarray-tests.el +++ b/test/lisp/obarray-tests.el @@ -32,28 +32,18 @@ (should-not (obarrayp "aoeu")) (should-not (obarrayp '())) (should-not (obarrayp [])) - (should (obarrayp (obarray-make 7))) - (should (obarrayp (make-vector 7 0)))) ; for compatibility? - -(ert-deftest obarrayp-unchecked-content-test () - "Should fail to check content of passed obarray." - :expected-result :failed (should-not (obarrayp ["a" "b" "c"])) - (should-not (obarrayp [1 2 3]))) - -(ert-deftest obarray-make-default-test () - (let ((table (obarray-make))) - (should (obarrayp table)) - (should (eq (obarray-size table) obarray-default-size)))) + (should-not (obarrayp [1 2 3])) + (should-not (obarrayp (make-vector 7 0))) + (should-not (obarrayp (vector (obarray-make)))) + (should (obarrayp (obarray-make))) + (should (obarrayp (obarray-make 7)))) (ert-deftest obarray-make-with-size-test () ;; FIXME: Actually, `wrong-type-argument' is not the right error to signal, ;; so we shouldn't enforce this misbehavior in tests! (should-error (obarray-make -1) :type 'wrong-type-argument) - (should-error (obarray-make 0) :type 'wrong-type-argument) - (let ((table (obarray-make 1))) - (should (obarrayp table)) - (should (eq (obarray-size table) 1)))) + (should-error (obarray-make 'a) :type 'wrong-type-argument)) (ert-deftest obarray-get-test () (let ((table (obarray-make 3)))