Skip to content

Commit 76a92b5

Browse files
committed
Deprecate String.capitalize/2 in favor of :string.titlecase/1
When capitalize/2 was written, Erlang did not provide titlecase functions. capitalize/2 also downcases the rest of the string while Erlang doesn't, which is a common contention point. Given our titlecase implementation requires 40kb of additional code in .beam files, it makes sense to unify it with Erlang's (which we have already done for most non-critical functionality in unicode).
1 parent dced276 commit 76a92b5

File tree

5 files changed

+38
-105
lines changed

5 files changed

+38
-105
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* [Date] Deprecate inferring a range with negative step, call `Date.range/3` with a negative step instead
2323
* [Enum] Deprecate passing a range with negative step on `Enum.slice/2`, give `first..last//1` instead
2424
* [String] Deprecate passing a range with negative step on `String.slice/2`, give `first..last//1` instead
25+
* [String] Deprecate `capitalize` in favor of Erlang's `:string.titlecase/1`
2526

2627
#### ExUnit
2728

lib/elixir/lib/string.ex

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,7 @@ defmodule String do
823823
iex> String.upcase("ıi", :turkic)
824824
"Iİ"
825825
826+
Also see `String.downcase/2` and Erlang's `:string.titlecase/1` for other conversions.
826827
"""
827828
@spec upcase(t, :default | :ascii | :greek | :turkic) :: t
828829
def upcase(string, mode \\ :default)
@@ -857,6 +858,8 @@ defmodule String do
857858
lowercases only the letters A to Z. `:greek` includes the context sensitive
858859
mappings found in Greek. `:turkic` properly handles the letter i with the dotless variant.
859860
861+
Also see `String.upcase/2` and Erlang's `:string.titlecase/1` for other conversions.
862+
860863
## Examples
861864
862865
iex> String.downcase("ABCD")
@@ -917,28 +920,8 @@ defmodule String do
917920
defp downcase_ascii(<<char, rest::bits>>), do: [char | downcase_ascii(rest)]
918921
defp downcase_ascii(<<>>), do: []
919922

920-
@doc """
921-
Converts the first character in the given string to
922-
uppercase and the remainder to lowercase according to `mode`.
923-
924-
`mode` may be `:default`, `:ascii`, `:greek` or `:turkic`. The `:default` mode considers
925-
all non-conditional transformations outlined in the Unicode standard. `:ascii`
926-
capitalizes only the letters A to Z. `:greek` includes the context sensitive
927-
mappings found in Greek. `:turkic` properly handles the letter i with the dotless variant.
928-
929-
## Examples
930-
931-
iex> String.capitalize("abcd")
932-
"Abcd"
933-
934-
iex> String.capitalize("fin")
935-
"Fin"
936-
937-
iex> String.capitalize("olá")
938-
"Olá"
939-
940-
"""
941-
@spec capitalize(t, :default | :ascii | :greek | :turkic) :: t
923+
@doc false
924+
@deprecated "Use :string.titlecase instead"
942925
def capitalize(string, mode \\ :default)
943926

944927
def capitalize(<<char, rest::binary>>, :ascii) do
@@ -947,8 +930,10 @@ defmodule String do
947930
end
948931

949932
def capitalize(string, mode) when is_binary(string) do
950-
{char, rest} = String.Unicode.titlecase_once(string, mode)
951-
char <> downcase(rest, mode)
933+
case next_grapheme(string) do
934+
{left, right} -> :string.titlecase(left) <> downcase(right, mode)
935+
nil -> string
936+
end
952937
end
953938

954939
@doc false

lib/elixir/pages/references/compatibility-and-deprecations.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ The first column is the version the feature was hard deprecated. The second colu
8080

8181
Version | Deprecated feature | Replaced by (available since)
8282
:-------| :-------------------------------------------------- | :---------------------------------------------------------------
83+
[v1.16] | Ranges with negative steps in `Enum.slice/2` | Explicit steps in ranges (v1.11)
84+
[v1.16] | Ranges with negative steps in `String.slice/2` | Explicit steps in ranges (v1.11)
85+
[v1.16] | `String.capitalize/2` | `:string.titlecase/1` (v1.8)
8386
[v1.15] | `Calendar.ISO.day_of_week/3` | `Calendar.ISO.day_of_week/4` (v1.11)
8487
[v1.15] | `Exception.exception?/1` | `Kernel.is_exception/1` (v1.11)
8588
[v1.15] | `Regex.regex?/1` | `Kernel.is_struct/2` (`Kernel.is_struct(term, Regex)`) (v1.11)
@@ -204,3 +207,4 @@ Version | Deprecated feature | Replaced by (ava
204207
[v1.13]: https://github.com/elixir-lang/elixir/blob/v1.13/CHANGELOG.md#4-hard-deprecations
205208
[v1.14]: https://github.com/elixir-lang/elixir/blob/v1.14/CHANGELOG.md#4-hard-deprecations
206209
[v1.15]: https://github.com/elixir-lang/elixir/blob/v1.15/CHANGELOG.md#4-hard-deprecations
210+
[v1.16]: https://github.com/elixir-lang/elixir/blob/main/CHANGELOG.md#4-hard-deprecations

lib/elixir/test/elixir/string_test.exs

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -253,35 +253,23 @@ defmodule StringTest do
253253
end
254254

255255
test "capitalize/1" do
256-
assert String.capitalize("") == ""
257-
assert String.capitalize("abc") == "Abc"
258-
assert String.capitalize("ABC") == "Abc"
259-
assert String.capitalize("c b a") == "C b a"
260-
assert String.capitalize("1ABC") == "1abc"
261-
assert String.capitalize("_aBc1") == "_abc1"
262-
assert String.capitalize(" aBc1") == " abc1"
263-
end
264-
265-
test "capitalize/1 with UTF-8" do
266-
assert String.capitalize("àáâ") == "Àáâ"
267-
assert String.capitalize("ÀÁÂ") == "Àáâ"
268-
assert String.capitalize("âáà") == "Âáà"
269-
assert String.capitalize("ÂÁÀ") == "Âáà"
270-
assert String.capitalize("òóôõö") == "Òóôõö"
271-
assert String.capitalize("ÒÓÔÕÖ") == "Òóôõö"
272-
assert String.capitalize("fin") == "Fin"
273-
end
274-
275-
test "capitalize/1 with ascii" do
276-
assert String.capitalize("àáâ", :ascii) == "àáâ"
277-
assert String.capitalize("aáA", :ascii) == "Aáa"
278-
end
279-
280-
test "capitalize/1 with turkic" do
281-
assert String.capitalize("iii", :turkic) == "İii"
282-
assert String.capitalize("ııı", :turkic) == "Iıı"
283-
assert String.capitalize("İii", :turkic) == "İii"
284-
assert String.capitalize("Iıı", :turkic) == "Iıı"
256+
mod = String
257+
assert mod.capitalize("") == ""
258+
assert mod.capitalize("abc") == "Abc"
259+
assert mod.capitalize("ABC") == "Abc"
260+
assert mod.capitalize("c b a") == "C b a"
261+
assert mod.capitalize("1ABC") == "1abc"
262+
assert mod.capitalize("_aBc1") == "_abc1"
263+
assert mod.capitalize(" aBc1") == " abc1"
264+
assert mod.capitalize("àáâ") == "Àáâ"
265+
assert mod.capitalize("ÀÁÂ") == "Àáâ"
266+
assert mod.capitalize("âáà") == "Âáà"
267+
assert mod.capitalize("ÂÁÀ") == "Âáà"
268+
assert mod.capitalize("òóôõö") == "Òóôõö"
269+
assert mod.capitalize("ÒÓÔÕÖ") == "Òóôõö"
270+
assert mod.capitalize("fin") == "Fin"
271+
assert mod.capitalize("àáâ", :ascii) == "àáâ"
272+
assert mod.capitalize("aáA", :ascii) == "Aáa"
285273
end
286274

287275
test "replace_leading/3" do

lib/elixir/unicode/unicode.ex

Lines changed: 7 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,12 @@ case_ignorable_categories = :binary.compile_pattern(["Mn", "Me", "Cf", "Lm", "Sk
9393
_iso,
9494
upper,
9595
lower,
96-
title
96+
_title
9797
] = :binary.split(line, ";", [:global])
9898

9999
cacc =
100-
if upper != "" or lower != "" or title != "" do
101-
[{to_binary.(codepoint), to_binary.(upper), to_binary.(lower), to_binary.(title)} | cacc]
100+
if upper != "" or lower != "" do
101+
[{to_binary.(codepoint), to_binary.(upper), to_binary.(lower)} | cacc]
102102
else
103103
cacc
104104
end
@@ -168,14 +168,14 @@ defmodule String.Unicode do
168168
acc
169169

170170
line, acc ->
171-
[codepoint, lower, title, upper, _] = :binary.split(line, "; ", [:global])
171+
[codepoint, lower, _title, upper, _] = :binary.split(line, "; ", [:global])
172172
key = to_binary.(codepoint)
173173

174174
:lists.keystore(
175175
key,
176176
1,
177177
acc,
178-
{key, to_binary.(upper), to_binary.(lower), to_binary.(title)}
178+
{key, to_binary.(upper), to_binary.(lower)}
179179
)
180180
end)
181181

@@ -265,7 +265,7 @@ defmodule String.Unicode do
265265

266266
{singles, tables} =
267267
compute_lookup.(
268-
for {codepoint, _upper, lower, _title} <- codes,
268+
for {codepoint, _upper, lower} <- codes,
269269
lower && lower != codepoint,
270270
codepoint not in conditional_downcase,
271271
do: {codepoint, lower}
@@ -352,7 +352,7 @@ defmodule String.Unicode do
352352

353353
{singles, tables} =
354354
compute_lookup.(
355-
for {codepoint, upper, _lower, _title} <- codes,
355+
for {codepoint, upper, _lower} <- codes,
356356
upper && upper != codepoint,
357357
codepoint not in conditional_upcase,
358358
do: {codepoint, upper}
@@ -380,51 +380,6 @@ defmodule String.Unicode do
380380
end
381381

382382
def upcase("", acc, _mode), do: IO.iodata_to_binary(:lists.reverse(acc))
383-
384-
# Titlecase once
385-
386-
def titlecase_once("", _mode), do: {"", ""}
387-
388-
# Turkic i -> İ
389-
def titlecase_once(<<@letter_i, rest::binary>>, mode) do
390-
char = if mode == :turkic, do: @letter_I_dot_above, else: @letter_I
391-
{char, rest}
392-
end
393-
394-
conditional_titlecase = [@letter_i]
395-
396-
{singles, tables} =
397-
compute_lookup.(
398-
for {codepoint, _upper, _lower, title} <- codes,
399-
title && title != codepoint,
400-
codepoint not in conditional_titlecase,
401-
do: {codepoint, title}
402-
)
403-
404-
for {codepoint, title} <- singles do
405-
def titlecase_once(<<unquote(codepoint), rest::bits>>, _mode) do
406-
{unquote(title), rest}
407-
end
408-
end
409-
410-
for {prefix, clauses} <- tables do
411-
def titlecase_once(<<unquote(prefix), byte, rest::bits>>, _mode) do
412-
value = case byte, do: unquote(clauses)
413-
{value, rest}
414-
end
415-
end
416-
417-
def titlecase_once(<<char::utf8, rest::binary>>, _mode) do
418-
if char >= ?a and char <= ?z do
419-
{<<char - 32::utf8>>, rest}
420-
else
421-
{<<char::utf8>>, rest}
422-
end
423-
end
424-
425-
def titlecase_once(<<char, rest::binary>>, _mode) do
426-
{<<char>>, rest}
427-
end
428383
end
429384

430385
defmodule String.Break do

0 commit comments

Comments
 (0)