Project

General

Profile

AbsoLUAtion » content-negotiation.lua

Content Negotiation: e.g. Accept-Encoding, Accept-Language - gstrauss, 2020-09-20 08:22

 
1
-- content-negotiation.lua
2
--
3
-- Summary: perform custom content negotiation via lighttpd mod_magnet
4
--
5
-- Notes: various filesystem naming conventions might be used to place
6
-- lang and/or encoding extensions prior to original file extension,
7
-- or afterwards.  This implementation places lang before and encoding after,
8
-- demonstrationg how to implement either.
9
--
10
--
11
-- Copyright (c) 2017, Glenn Strauss (gstrauss () gluelogic.com), incremental
12
-- All rights reserved.
13
--
14
-- License: 3-clause BSD
15
--
16
-- Redistribution and use in source and binary forms, with or without
17
-- modification, are permitted provided that the following conditions are met:
18
-- 
19
-- - Redistributions of source code must retain the above copyright notice, this
20
--   list of conditions and the following disclaimer.
21
-- 
22
-- - Redistributions in binary form must reproduce the above copyright notice,
23
--   this list of conditions and the following disclaimer in the documentation
24
--   and/or other materials provided with the distribution.
25
-- 
26
-- - Neither the name of the 'incremental' nor the names of its contributors may
27
--   be used to endorse or promote products derived from this software without
28
--   specific prior written permission.
29
-- 
30
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31
-- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32
-- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33
-- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34
-- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35
-- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36
-- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37
-- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38
-- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39
-- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
40
-- THE POSSIBILITY OF SUCH DAMAGE.
41

    
42

    
43
function parse_HTTP_list (header)
44

    
45
    -- parse HTTP list from given request header
46
    -- There are *many* ways to parse strings in Lua, some more efficient than
47
    -- others, but none are a one-size-fits-all scenarios.  This is but one
48
    -- solution and has not been optimized.  Better solutions are welcomed.
49
    -- For HTTP list parsing with qvalues, the following assumes *no*
50
    -- quoted-strings containing the ',' or ';' delimiters, which is not fully
51
    -- RFC-compliant, but is also a reasonable simplification for headers
52
    -- including Accept-Language and Accept-Encoding, which generally have
53
    -- well-known sets of values.  Given that the following does not handle
54
    -- quoted-string, simplify string by removing all whitespace.  Then split
55
    -- on ',', and for each result, split on ';' and parse for qvalue.  Assume
56
    -- ';' is followed by "q=..." and nothing else, or ignore.  Ignore items
57
    -- with q=0 (or equivalent) since default content file will be provided if
58
    -- there is no better match.
59

    
60
    local tokens = {}, token
61
    if (not header) then return tokens end
62
    header = string.gsub(header, "%s+", "")
63
    for token in string.gmatch(header, "[^,]+") do
64
        local b, e = string.find(token, ";q=", 1, 1)
65
        local n = 1
66
        if b then
67
            n = tonumber(token:sub(e+1))
68
            token = (n ~= nil and n > 0) and token:sub(1, b-1) or ""
69
        else
70
            b = string.find(token, ";", 1, 1)
71
            if b then token = token:sub(1, b-1) end
72
        end
73
        if (token ~= "") then
74
            table.insert(tokens, { n, token })
75
        else
76
            -- ignore (skip) if invalid (e.g. empty token or invalid qvalue)
77
        end
78
    end
79

    
80
    -- sort on qvalue and return simple ordered, indexed list
81
    table.sort(tokens, function (v1,v2) return v1[1] > v2[1] end)
82
    local v
83
    local list = {}
84
    for _, v in ipairs(tokens) do
85
        table.insert(list, v[2])
86
    end
87
    return list
88

    
89
end
90

    
91

    
92
function negotiate_language (path)
93

    
94
    local langs = parse_HTTP_list(lighty.request["Accept-Language"])
95
    if not langs[1] then return 0 end
96

    
97
    -- split path into basepath and ext
98
    -- basepath ends in '.' for use below, and if present, ext begins with '.'
99
    local ext = string.match(path, "(%.[^/]+)$")
100
    local basepath
101
    if (ext) then
102
        basepath = path:sub(1, -#ext)
103
    else
104
        basepath = path .. "."
105
        ext = ""
106
    end
107

    
108
    -- check if basepath .. lang .. ext exists
109
    local lang
110
    for _, lang in ipairs(langs) do
111
        local attr = nil
112
        if (string.find(lang, "/", 1, 1)) then
113
            -- skip lang containing '/'
114
            -- security: avoid path traversal
115
            -- since lang is used in filenames, lang must not contain '/'
116
        else
117
        if (lang == "en-US" or lang == "en") then
118
            -- (optional optimization; remove condition if should not apply)
119
            -- skip if default file is for "en-US" and "en"
120
            -- (assumes Accept-Language contains only en-US and/or en,
121
            --  or they are last, i.e. other languages are preferred)
122
        else
123
            path = basepath .. lang .. ext
124
            attr = lighty.stat(path)
125
        end
126
        end
127
        if (attr and attr["is_file"]) then
128
            lighty.env["physical.path"] = path
129
            lighty.env["physical.rel-path"] = (#ext)
130
              and lighty.env["physical.rel-path"]:sub(1, -#ext) .. lang .. ext
131
              or lighty.env["physical.rel-path"] .. "." .. lang
132
            return 0
133
        end
134
    end
135

    
136
    return 0
137

    
138
end
139

    
140

    
141
local encoding_exts =
142
  {
143
    ["br"]    = ".br"                          -- brotli
144
   ,["gzip"]  = ".gz",  ["x-gzip"]  = ".gz"    -- gzip
145
-- ,["bzip2"] = ".bz2", ["x-bzip2"] = ".bz2"   -- bzip2
146
  }
147

    
148
function negotiate_encoding (path, content_type)
149

    
150
    local encs = parse_HTTP_list(lighty.request["Accept-Encoding"])
151
    if not encs[1] then return 0 end
152

    
153
    -- check if pre-encoded file exists with mapped extension
154
    local enc
155
    local basepath = path
156
    for _, enc in ipairs(encs) do
157
        local ext = encoding_exts[enc:gsub(".*", string.lower)]
158
        if (ext) then
159
            path = basepath .. ext
160
            local attr = lighty.stat(path)
161
            if (attr and attr["is_file"]) then
162
                lighty.env["physical.path"] = path
163
                lighty.env["physical.rel-path"] =
164
                  lighty.env["physical.rel-path"] .. ext
165
                lighty.header["Content-Encoding"] = enc
166
                if (content_type) then
167
                    lighty.header["Content-Type"] = content_type
168
                else
169
                    lighty.header["Content-Type"] = "application/octet-stream"
170
                end
171
                return 0
172
            end
173
        end
174
    end
175

    
176
    return 0
177

    
178
end
179

    
180

    
181
--
182
-- content negotiation
183
--
184

    
185
-- check that default content file exists (or index-file)
186
local attr = lighty.stat(lighty.env["physical.path"])
187
if (not attr) then return 0 end
188
if (not attr["is_file"]) then
189
    if (attr["is_dir"]) then
190
        -- check for index file (code below checks only for index.html)
191
        local path = lighty.env["physical.path"]
192
        local indexfile =
193
          string.sub(path, -1) == "/" and "index.html" or "/index.html"
194
        path = path .. indexfile
195
        attr = lighty.stat(path)
196
        if (not attr or not attr["is_file"]) then return 0 end
197
        -- (below assignments not required; merely shortcut mod_indexfile)
198
        lighty.env["physical.path"] = path
199
        lighty.env["physical.rel-path"] =
200
          lighty.env["physical.rel-path"] .. indexfile
201
    else
202
        return 0
203
    end
204
end
205

    
206
negotiate_language(lighty.env["physical.path"])
207

    
208
negotiate_encoding(lighty.env["physical.path"], attr["content-type"])
209

    
210
return 0
(1-1/3)