1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
| (defun wr/org-hugo-slug (str &optional allow-double-hyphens)
"Convert string STR to a `slug' and return that string.
A `slug' is the part of a URL which identifies a particular page
on a website in an easy to read form.
Example: If STR is \"My First Post\", it will be converted to a
slug \"my-first-post\", which can become part of an easy to read
URL like \"https://example.com/posts/my-first-post/\".
In general, STR is a string. But it can also be a string with
Markdown markup because STR is often a post's sub-heading (which
can contain bold, italics, link, etc markup).
The `slug' generated from that STR follows these rules:
- Contain only lower case alphabet, number and hyphen characters
([[:alnum:]-]).
- Not have *any* HTML tag like \"<code>..</code>\",
\"<span class=..>..</span>\", etc.
- Not contain any URLs (if STR happens to be a Markdown link).
- Replace \".\" in STR with \"dot\", \"&\" with \"and\",
\"+\" with \"plus\".
- Replace parentheses with double-hyphens. So \"foo (bar) baz\"
becomes \"foo--bar--baz\".
- Replace non [[:alnum:]-] chars with spaces, and then one or
more consecutive spaces with a single hyphen.
- If ALLOW-DOUBLE-HYPHENS is non-nil, at most two consecutive
hyphens are allowed in the returned string, otherwise consecutive
hyphens are not returned.
- No hyphens allowed at the leading or trailing end of the slug."
(let* (;; All lower-case
(str (downcase str))
;; Remove "<FOO>..</FOO>" HTML tags if present.
(str (replace-regexp-in-string "<\\(?1:[a-z]+\\)[^>]*>.*</\\1>" "" str))
;; Remove URLs if present in the string. The ")" in the
;; below regexp is the closing parenthesis of a Markdown
;; link: [Desc](Link).
(str (replace-regexp-in-string (concat "\\](" ffap-url-regexp "[^)]+)") "]" str))
;; Replace "&" with " and ", "." with " dot ", "+" with
;; " plus ".
(str (replace-regexp-in-string
"&" " and "
(replace-regexp-in-string
"\\." " dot "
(replace-regexp-in-string
"\\+" " plus " str))))
;; Replace all characters except alphabets, numbers and
;; parentheses with spaces.
(str (replace-regexp-in-string "[^[:alnum:]()]" " " str))
;; On emacs 24.5, multibyte punctuation characters like ":"
;; are considered as alphanumeric characters! Below evals to
;; non-nil on emacs 24.5:
;; (string-match-p "[[:alnum:]]+" ":")
;; So replace them with space manually..
(str (if (version< emacs-version "25.0")
(let ((multibyte-punctuations-str ":")) ;String of multibyte punctuation chars
(replace-regexp-in-string (format "[%s]" multibyte-punctuations-str) " " str))
str))
;; Remove leading and trailing whitespace.
(str (replace-regexp-in-string "\\(^[[:space:]]*\\|[[:space:]]*$\\)" "" str))
;; Replace 2 or more spaces with a single space.
(str (replace-regexp-in-string "[[:space:]]\\{2,\\}" " " str))
;; Replace parentheses with double-hyphens.
(str (replace-regexp-in-string "\\s-*([[:space:]]*\\([^)]+?\\)[[:space:]]*)\\s-*" " -\\1- " str))
;; Remove any remaining parentheses character.
(str (replace-regexp-in-string "[()]" "" str))
;; Replace spaces with hyphens.
(str (replace-regexp-in-string " " "_" str))
;; Remove leading and trailing hyphens.
(str (replace-regexp-in-string "\\(^[-]*\\|[-]*$\\)" "" str)))
(unless allow-double-hyphens
(setq str (replace-regexp-in-string "--" "_" str)))
str))
|