1
|
-- content-negotiation.lua
|
2
|
--
|
3
|
-- Summary: perform custom content negotiation via lighttpd mod_magnet
|
4
|
--
|
5
|
-- Notes: various filesystem naming conventions might be used to place
|
6
|
-- lang and/or encoding extensions prior to original file extension,
|
7
|
-- or afterwards. This implementation places lang before and encoding after,
|
8
|
-- demonstrationg how to implement either.
|
9
|
--
|
10
|
--
|
11
|
-- Copyright (c) 2017, Glenn Strauss (gstrauss () gluelogic.com), incremental
|
12
|
-- All rights reserved.
|
13
|
--
|
14
|
-- License: 3-clause BSD
|
15
|
--
|
16
|
-- Redistribution and use in source and binary forms, with or without
|
17
|
-- modification, are permitted provided that the following conditions are met:
|
18
|
--
|
19
|
-- - Redistributions of source code must retain the above copyright notice, this
|
20
|
-- list of conditions and the following disclaimer.
|
21
|
--
|
22
|
-- - Redistributions in binary form must reproduce the above copyright notice,
|
23
|
-- this list of conditions and the following disclaimer in the documentation
|
24
|
-- and/or other materials provided with the distribution.
|
25
|
--
|
26
|
-- - Neither the name of the 'incremental' nor the names of its contributors may
|
27
|
-- be used to endorse or promote products derived from this software without
|
28
|
-- specific prior written permission.
|
29
|
--
|
30
|
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
31
|
-- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
32
|
-- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
33
|
-- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
34
|
-- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
35
|
-- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
36
|
-- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
37
|
-- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
38
|
-- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
39
|
-- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
40
|
-- THE POSSIBILITY OF SUCH DAMAGE.
|
41
|
|
42
|
|
43
|
function parse_HTTP_list (header)
|
44
|
|
45
|
-- parse HTTP list from given request header
|
46
|
-- There are *many* ways to parse strings in Lua, some more efficient than
|
47
|
-- others, but none are a one-size-fits-all scenarios. This is but one
|
48
|
-- solution and has not been optimized. Better solutions are welcomed.
|
49
|
-- For HTTP list parsing with qvalues, the following assumes *no*
|
50
|
-- quoted-strings containing the ',' or ';' delimiters, which is not fully
|
51
|
-- RFC-compliant, but is also a reasonable simplification for headers
|
52
|
-- including Accept-Language and Accept-Encoding, which generally have
|
53
|
-- well-known sets of values. Given that the following does not handle
|
54
|
-- quoted-string, simplify string by removing all whitespace. Then split
|
55
|
-- on ',', and for each result, split on ';' and parse for qvalue. Assume
|
56
|
-- ';' is followed by "q=..." and nothing else, or ignore. Ignore items
|
57
|
-- with q=0 (or equivalent) since default content file will be provided if
|
58
|
-- there is no better match.
|
59
|
|
60
|
local tokens = {}, token
|
61
|
if (not header) then return tokens end
|
62
|
header = string.gsub(header, "%s+", "")
|
63
|
for token in string.gmatch(header, "[^,]+") do
|
64
|
local b, e = string.find(token, ";q=", 1, 1)
|
65
|
local n = 1
|
66
|
if b then
|
67
|
n = tonumber(token:sub(e+1))
|
68
|
token = (n ~= nil and n > 0) and token:sub(1, b-1) or ""
|
69
|
else
|
70
|
b = string.find(token, ";", 1, 1)
|
71
|
if b then token = token:sub(1, b-1) end
|
72
|
end
|
73
|
if (token ~= "") then
|
74
|
table.insert(tokens, { n, token })
|
75
|
else
|
76
|
-- ignore (skip) if invalid (e.g. empty token or invalid qvalue)
|
77
|
end
|
78
|
end
|
79
|
|
80
|
-- sort on qvalue and return simple ordered, indexed list
|
81
|
table.sort(tokens, function (v1,v2) return v1[1] > v2[1] end)
|
82
|
local v
|
83
|
local list = {}
|
84
|
for _, v in ipairs(tokens) do
|
85
|
table.insert(list, v[2])
|
86
|
end
|
87
|
return list
|
88
|
|
89
|
end
|
90
|
|
91
|
|
92
|
function negotiate_language (path)
|
93
|
|
94
|
local langs = parse_HTTP_list(lighty.request["Accept-Language"])
|
95
|
if not langs[1] then return 0 end
|
96
|
|
97
|
-- split path into basepath and ext
|
98
|
-- basepath ends in '.' for use below, and if present, ext begins with '.'
|
99
|
local ext = string.match(path, "(%.[^/]+)$")
|
100
|
local basepath
|
101
|
if (ext) then
|
102
|
basepath = path:sub(1, -#ext)
|
103
|
else
|
104
|
basepath = path .. "."
|
105
|
ext = ""
|
106
|
end
|
107
|
|
108
|
-- check if basepath .. lang .. ext exists
|
109
|
local lang
|
110
|
for _, lang in ipairs(langs) do
|
111
|
local attr = nil
|
112
|
if (string.find(lang, "/", 1, 1)) then
|
113
|
-- skip lang containing '/'
|
114
|
-- security: avoid path traversal
|
115
|
-- since lang is used in filenames, lang must not contain '/'
|
116
|
else
|
117
|
if (lang == "en-US" or lang == "en") then
|
118
|
-- (optional optimization; remove condition if should not apply)
|
119
|
-- skip if default file is for "en-US" and "en"
|
120
|
-- (assumes Accept-Language contains only en-US and/or en,
|
121
|
-- or they are last, i.e. other languages are preferred)
|
122
|
else
|
123
|
path = basepath .. lang .. ext
|
124
|
attr = lighty.stat(path)
|
125
|
end
|
126
|
end
|
127
|
if (attr and attr["is_file"]) then
|
128
|
lighty.env["physical.path"] = path
|
129
|
lighty.env["physical.rel-path"] = (#ext)
|
130
|
and lighty.env["physical.rel-path"]:sub(1, -#ext) .. lang .. ext
|
131
|
or lighty.env["physical.rel-path"] .. "." .. lang
|
132
|
return 0
|
133
|
end
|
134
|
end
|
135
|
|
136
|
return 0
|
137
|
|
138
|
end
|
139
|
|
140
|
|
141
|
local encoding_exts =
|
142
|
{
|
143
|
["br"] = ".br" -- brotli
|
144
|
,["gzip"] = ".gz", ["x-gzip"] = ".gz" -- gzip
|
145
|
-- ,["bzip2"] = ".bz2", ["x-bzip2"] = ".bz2" -- bzip2
|
146
|
}
|
147
|
|
148
|
function negotiate_encoding (path, content_type)
|
149
|
|
150
|
local encs = parse_HTTP_list(lighty.request["Accept-Encoding"])
|
151
|
if not encs[1] then return 0 end
|
152
|
|
153
|
-- check if pre-encoded file exists with mapped extension
|
154
|
local enc
|
155
|
local basepath = path
|
156
|
for _, enc in ipairs(encs) do
|
157
|
local ext = encoding_exts[enc:gsub(".*", string.lower)]
|
158
|
if (ext) then
|
159
|
path = basepath .. ext
|
160
|
local attr = lighty.stat(path)
|
161
|
if (attr and attr["is_file"]) then
|
162
|
lighty.env["physical.path"] = path
|
163
|
lighty.env["physical.rel-path"] =
|
164
|
lighty.env["physical.rel-path"] .. ext
|
165
|
lighty.header["Content-Encoding"] = enc
|
166
|
if (content_type) then
|
167
|
lighty.header["Content-Type"] = content_type
|
168
|
else
|
169
|
lighty.header["Content-Type"] = "application/octet-stream"
|
170
|
end
|
171
|
return 0
|
172
|
end
|
173
|
end
|
174
|
end
|
175
|
|
176
|
return 0
|
177
|
|
178
|
end
|
179
|
|
180
|
|
181
|
--
|
182
|
-- content negotiation
|
183
|
--
|
184
|
|
185
|
-- check that default content file exists (or index-file)
|
186
|
local attr = lighty.stat(lighty.env["physical.path"])
|
187
|
if (not attr) then return 0 end
|
188
|
if (not attr["is_file"]) then
|
189
|
if (attr["is_dir"]) then
|
190
|
-- check for index file (code below checks only for index.html)
|
191
|
local path = lighty.env["physical.path"]
|
192
|
local indexfile =
|
193
|
string.sub(path, -1) == "/" and "index.html" or "/index.html"
|
194
|
path = path .. indexfile
|
195
|
attr = lighty.stat(path)
|
196
|
if (not attr or not attr["is_file"]) then return 0 end
|
197
|
-- (below assignments not required; merely shortcut mod_indexfile)
|
198
|
lighty.env["physical.path"] = path
|
199
|
lighty.env["physical.rel-path"] =
|
200
|
lighty.env["physical.rel-path"] .. indexfile
|
201
|
else
|
202
|
return 0
|
203
|
end
|
204
|
end
|
205
|
|
206
|
negotiate_language(lighty.env["physical.path"])
|
207
|
|
208
|
negotiate_encoding(lighty.env["physical.path"], attr["content-type"])
|
209
|
|
210
|
return 0
|