forked from comtihon/bson-erlang
-
Notifications
You must be signed in to change notification settings - Fork 6
/
bson.erl
232 lines (181 loc) · 9.46 KB
/
bson.erl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
%@doc A BSON document is a JSON-like object with a standard binary encoding defined at bsonspec.org. This implements version 1.0 of that spec.
-module (bson).
-export_type ([maybe/1]).
-export_type ([document/0, label/0, value/0]).
-export_type ([bin/0, bfunction/0, uuid/0, md5/0, userdefined/0]).
-export_type ([mongostamp/0, minmaxkey/0]).
-export_type ([regex/0, unixtime/0]).
-export_type ([javascript/0]).
-export_type ([objectid/0, unixsecs/0]).
-export ([lookup/2, lookup/3, at/2, include/2, exclude/2, update/3, merge/2, append/2]).
-export ([doc_foldl/3, doc_foldr/3, fields/1, document/1]).
-export ([utf8/1, str/1]).
-export ([timenow/0, ms_precision/1, secs_to_unixtime/1, unixtime_to_secs/1]).
-export ([objectid/3, objectid_time/1]).
-type maybe(A) :: {A} | {}.
% Document %
-type document() :: tuple(). % {label(), value(), label(), value(), ...}.
% Conceptually a document is a list of label-value pairs (associative array, dictionary, record). However, for read/write-ability, it is implemented as a flat tuple, ie. the list becomes a tuple and the pair braces are elided, so you just have alternating labels and values where each value is associated with the previous label.
% To distinguish a tagged value such as {uuid, _} (see value() type below) from a document with field name 'uuid' we made sure all valid tagged value types have an odd number of elements (documents have even number of elements). So actually only {bin, uuid, _} is a valid value, {uuid, _} is a document.
-type label() :: atom().
-spec doc_foldl (fun ((label(), value(), A) -> A), A, document()) -> A.
%@doc Reduce document by applying given function to each field with result of previous field's application, starting with given initial result.
doc_foldl (Fun, Acc, Doc) -> doc_foldlN (Fun, Acc, Doc, 0, tuple_size (Doc) div 2).
-spec doc_foldlN (fun ((label(), value(), A) -> A), A, document(), integer(), integer()) -> A.
%@doc Fold over fields from first index (inclusive) to second index (exclusive), zero-based index.
doc_foldlN (_, Acc, _, High, High) -> Acc;
doc_foldlN (Fun, Acc, Doc, Low, High) ->
Acc1 = Fun (element (Low * 2 + 1, Doc), element (Low * 2 + 2, Doc), Acc),
doc_foldlN (Fun, Acc1, Doc, Low + 1, High).
-spec doc_foldr (fun ((label(), value(), A) -> A), A, document()) -> A.
%@doc Same as doc_foldl/3 except apply fields in reverse order
doc_foldr (Fun, Acc, Doc) -> doc_foldrN (Fun, Acc, Doc, 0, tuple_size (Doc) div 2).
-spec doc_foldrN (fun ((label(), value(), A) -> A), A, document(), integer(), integer()) -> A.
%@doc Fold over fields from second index (exclusive) to first index (inclusive), zero-based index.
doc_foldrN (_, Acc, _, Low, Low) -> Acc;
doc_foldrN (Fun, Acc, Doc, Low, High) ->
Acc1 = Fun (element (High * 2 - 1, Doc), element (High * 2, Doc), Acc),
doc_foldrN (Fun, Acc1, Doc, Low, High - 1).
-spec fields (document()) -> [{label(), value()}].
%@doc Convert document to a list of all its fields
fields (Doc) -> doc_foldr (fun (Label, Value, List) -> [{Label, Value} | List] end, [], Doc).
-spec document ([{label(), value()}]) -> document().
%@doc Convert list of fields to a document
document (Fields) -> list_to_tuple (flatten (Fields)).
-spec flatten ([{label(), value()}]) -> [label() | value()].
%@doc Flatten list by removing tuple constructors
flatten ([]) -> [];
flatten ([{Label, Value} | Fields]) -> [Label, Value | flatten (Fields)].
-spec lookup (label(), document()) -> maybe (value()).
%@doc Value of field in document if there
lookup (Label, Doc) -> case find (Label, Doc) of
{Index} -> {element (Index * 2 + 2, Doc)};
{} -> {} end.
-spec lookup (label(), document(), value()) -> value().
%@doc Value of field in document if there or default
lookup (Label, Doc, Default) -> case find (Label, Doc) of
{Index} -> element (Index * 2 + 2, Doc);
{} -> Default end.
-spec find (label(), document()) -> maybe (integer()).
%@doc Index of field in document if there
find (Label, Doc) -> findN (Label, Doc, 0, tuple_size (Doc) div 2).
-spec findN (label(), document(), integer(), integer()) -> maybe (integer()).
%@doc Find field index in document from first index (inclusive) to second index (exclusive).
findN (_Label, _Doc, High, High) -> {};
findN (Label, Doc, Low, High) -> case element (Low * 2 + 1, Doc) of
Label -> {Low};
_ -> findN (Label, Doc, Low + 1, High) end.
-spec at (label(), document()) -> value().
%@doc Value of field in document, error if missing
at (Label, Document) -> case lookup (Label, Document) of
{} -> erlang:error (missing_field, [Label, Document]);
{Value} -> Value end.
-spec include ([label()], document()) -> document().
%@doc Project given fields of document
include (Labels, Document) ->
Fun = fun (Label, Doc) -> case lookup (Label, Document) of
{Value} -> [Label, Value | Doc];
{} -> Doc end end,
list_to_tuple (lists:foldr (Fun, [], Labels)).
-spec exclude ([label()], document()) -> document().
%@doc Remove given fields from document
exclude (Labels, Document) ->
Fun = fun (Label, Value, Doc) -> case lists:member (Label, Labels) of
false -> [Label, Value | Doc];
true -> Doc end end,
list_to_tuple (doc_foldr (Fun, [], Document)).
-spec update (label(), value(), document()) -> document().
%@doc Replace field with new value, adding to end if new
update (Label, Value, Document) -> case find (Label, Document) of
{Index} -> setelement (Index * 2 + 2, Document, Value);
{} ->
Doc = erlang:append_element (Document, Label),
erlang:append_element (Doc, Value) end.
-spec merge (document(), document()) -> document().
%@doc First doc overrides second with new fields added at end of second doc
merge (UpDoc, BaseDoc) ->
Fun = fun (Label, Value, Doc) -> update (Label, Value, Doc) end,
doc_foldl (Fun, BaseDoc, UpDoc).
-spec append (document(), document()) -> document().
%@doc Append two documents together
append (Doc1, Doc2) -> list_to_tuple (tuple_to_list (Doc1) ++ tuple_to_list (Doc2)).
% Value %
-type value() ::
float() |
utf8() |
document() |
arr() |
bin() |
bfunction() |
uuid() |
md5() |
userdefined() |
objectid() |
boolean() |
unixtime() |
null |
regex() |
javascript() |
atom() |
integer() |
mongostamp() |
minmaxkey().
% Note, No value() can be a tuple with even number of elements because then it would be ambiguous with document(). Therefore all tagged values defined below have odd number of elements.
% Array %
-type arr() :: [value()].
% Caution, a string() will be interpreted as an array of integers. You must supply strings as utf8 binary, see below.
% String %
-type utf8() :: unicode:unicode_binary().
% binary() representing a string of characters encoded with UTF-8.
% An Erlang string() is a list of unicode characters (codepoints), but this list must be converted to utf-8 binary for use in Bson. Call utf8/1 to do this, or encode pure ascii literals directly as `<<"abc">>' and non-pure ascii literals as `<<"aßc"/utf8>>'.
-spec utf8 (unicode:chardata()) -> utf8().
%@doc Convert string to utf8 binary. string() is a subtype of unicode:chardata().
utf8 (CharData) -> case unicode:characters_to_binary (CharData) of
{error, _Bin, _Rest} -> erlang:error (unicode_error, [CharData]);
{incomplete, _Bin, _Rest} -> erlang:error (unicode_incomplete, [CharData]);
Bin -> Bin end.
-spec str (unicode:chardata()) -> string().
%@doc Convert utf8 binary to string. utf8() is a subtype of unicode:chardata().
str (CharData) -> case unicode:characters_to_list (CharData) of
{error, _Bin, _Rest} -> erlang:error (unicode_error, [CharData]);
{incomplete, _Bin, _Rest} -> erlang:error (unicode_incomplete, [CharData]);
Str -> Str end.
% Binary %
-type bin() :: {bin, bin, binary()}.
-type bfunction() :: {bin, function, binary()}.
-type uuid() :: {bin, uuid, binary()}.
-type md5() :: {bin, md5, binary()}.
-type userdefined() :: {bin, userdefined, binary()}.
% Special %
-type mongostamp() :: {mongostamp, integer(), integer()}.
% 4-byte increment, 4-byte timestamp. 0 timestamp has special semantics
-type minmaxkey() :: 'MIN_KEY' | 'MAX_KEY'.
% Special values that compare lower/higher than all other bson values
% Regex %
-type regex() :: {regex, utf8(), utf8()}. % pattern and options
% Datetime %
-type unixtime() :: {integer(), integer(), integer()}. % {MegaSecs, Secs, MicroSecs}
% Unix time in Erlang now/os:timstamp format, but only to millisecond precision when serialized.
-spec timenow () -> unixtime(). % IO
% Current unixtime to millisecond precision, ie. MicroSecs is always a multiple of 1000.
timenow() -> ms_precision (os:timestamp()).
-spec ms_precision (unixtime()) -> unixtime().
%@doc Truncate microsecs to millisecs since bson drops microsecs anyway, so time will be equal before and after serialization.
ms_precision ({MegaSecs, Secs, MicroSecs}) ->
{MegaSecs, Secs, MicroSecs div 1000 * 1000}.
-type unixsecs() :: integer(). % Unix Time in seconds
-spec secs_to_unixtime (unixsecs()) -> unixtime().
secs_to_unixtime (UnixSecs) -> {UnixSecs div 1000000, UnixSecs rem 1000000, 0}.
-spec unixtime_to_secs (unixtime()) -> unixsecs().
unixtime_to_secs ({MegaSecs, Secs, _}) -> MegaSecs * 1000000 + Secs.
% Javascript %
-type javascript() :: {javascript, document(), utf8()}. % scope and code
% ObjectId %
-type objectid() :: {<<_:96>>}.
% `<<UnixTimeSecs:32/big, MachineId:24/big, ProcessId:16/big, Count:24/big>>'
-spec objectid (unixsecs(), <<_:40>>, integer()) -> objectid().
objectid (UnixSecs, MachineAndProcId, Count) ->
{<<UnixSecs :32/big, MachineAndProcId :5/binary, Count :24/big>>}.
-spec objectid_time (objectid()) -> unixtime().
%@doc Time when object id was generated
objectid_time ({<<UnixSecs:32/big, _:64>>}) -> secs_to_unixtime (UnixSecs).