expressjs/body-parser 源码分析

文章目录

由于需要在 expressjs 中解析 xml ，而 expressjs 默认无法解析 xml 。所以我打算自己造一个工业级轮子，于是参考了 expressjs 中关于 json 解析方面的代码。代码分析如是。

朴素实现是监听 req 的 data 事件，将读到的数据存储在字符串变量中再进行解析。body-parser 的实现与此有不少区别。首先，代码中有很多校验信息，要根据请求头中的内容对请求体做类型、长度、字符编码等校验，以此提高安全性；其次，代码都是通过调用 raw-body 模块中的 getRawBody(stream, options, callback) 来解析请求体的，而不是直接监听 data 事件进行操作。getRawBody 函数会做请求体校验、异常处理、管道卸载等工作，比朴素的做法安全很多。 getRawBody 函数也可以完成解码功能。

body-parser 的文档和代码见： https://github.com/expressjs/body-parser 。中文注释为我的注析。这里只解析 index.js 及其调用的内容， lib/types/ 下的其他文件结构类似，忽略不析。

index.jslink

/*!
 * body-parser
 * Copyright(c) 2014 Douglas Christopher Wilson
 * MIT Licensed
 */

/**
 * Module dependencies.
 */

// deprecate 是一个用来显示“不建议”消息的工具，可以用来警告用户不要使用
// 那些不建议使用的函数或模块。详见：
// https://github.com/dougwilson/nodejs-depd
var deprecate = require('depd')('body-parser')
var fs = require('fs') // 载入文件处理模块，nodejs 核心模块。
// 载入路径处理模块，nodejs 核心模块，参考：
// http://nodejs.org/api/path.html
var path = require('path')

/**
 * Module exports.
 */

exports = module.exports = deprecate.function(bodyParser,
  'bodyParser: use individual json/urlencoded middlewares')

/**
 * Path to the parser modules.
 */

var parsersDir = path.join(__dirname, 'lib', 'types')

/**
 * Auto-load bundled parsers with getters.
 */

//遍历 /lib/types 目录下的文件。
fs.readdirSync(parsersDir).forEach(function onfilename(filename) {
  if (!/\.js$/.test(filename)) return //如果文件不是 .js 文件，返回。

  var loc = path.resolve(parsersDir, filename) // 提取文件的绝对位置。
  var mod
  var name = path.basename(filename, '.js') // 提取文件的基础名。

  function load() {
    if (mod) {
      return mod
    }

    // 载入位于loc位置的文件，由于load函数可能在其他地方被调用，所以这里
    // loc 记录的是绝对位置，增强代码的可移植性。
    return mod = require(loc)
  }

  // 给 exports 添加一个新的属性 name，代码还设置了 name 属性的三个属性
  // 描述符。关于 Object.defineProperty ，详见：
  // https://developer.mozilla.org/zh-CN/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty
  Object.defineProperty(exports, name, {
    configurable: true, // 使 name 属性可以被改变。
    enumerable: true, // 使 name 属性可枚举。
    get: load // 使 name 属性有 getter 方法。
  })
})

/**
 * Create a middleware to parse json and urlencoded bodies.
 *
 * @param {object} [options]
 * @return {function}
 * @deprecated
 * @api public
 */

function bodyParser(options){
  var opts = {}

  options = options || {} // 初始化 options 。

  // exclude type option
  for (var prop in options) {
    if ('type' !== prop) {
      opts[prop] = options[prop]
    }
  }

  var _urlencoded = exports.urlencoded(opts)
  var _json = exports.json(opts)

  return function bodyParser(req, res, next) {
    _json(req, res, function(err){ // 将请求体解析为 json 。
      if (err) return next(err);
      // 如果请求体不是 json ，则将其解析为 urlencoded 内容。
      _urlencoded(req, res, next);
    });
  }
}

lib/types/json.jslink

/*!
 * body-parser
 * Copyright(c) 2014 Jonathan Ong
 * Copyright(c) 2014 Douglas Christopher Wilson
 * MIT Licensed
 */

/**
 * Module dependencies.
 */

// bytes 模块可以在字节度量的数字表示和字符串表示之
// 间互相转换。例如：
// bytes('1kb') 结果为 1024 ；
// bytes('2mb') 结果为 2097152 ；
// bytes('1gb') 结果为 1073741824 ；
// bytes(1073741824) 结果为 1gb ；
// bytes(1099511627776) 结果为 1tb 。
// 详见 github 上的 visionmedia/bytes.js 项目：
// https://github.com/visionmedia/bytes.js
var bytes = require('bytes')
var read = require('../read')
// expressjs 自带的媒体解析模块，详见：
// https://github.com/expressjs/media-typer
var typer = require('media-typer') 
// type-is 是 expressjs 自带的类型判断模块，详见 github 上的 expressjs/
// type-is 项目： https://github.com/expressjs/type-is
var typeis = require('type-is')

/**
 * Module exports.
 */

module.exports = json

/**
 * RegExp to match the first non-space in a string.
 */

var firstcharRegExp = /^\s*(.)/

/**
 * Create a middleware to parse JSON bodies.
 *
 * @param {object} [options]
 * @return {function}
 * @api public
 */

function json(options) {
  options = options || {}

  var limit = typeof options.limit !== 'number'
    ? bytes(options.limit || '100kb')
    : options.limit // 设置解析请求体长度上限。
  var inflate = options.inflate !== false // 设置是否要将压缩的请求体解压。
  var reviver = options.reviver // 传递给 JSON.parse() 的参数。
  var strict = options.strict !== false // 设置是否只解析对象和数组。
  var type = options.type || 'json' // 设置解析的请求体内容类型。
  var verify = options.verify || false // 设置请求体内容验证函数。

  if (verify !== false && typeof verify !== 'function') {
    throw new TypeError('option verify must be function')
  } // 请求体的内容验证函数必须是一个函数。

  function parse(body) {
    if (0 === body.length) {
      throw new Error('invalid json, empty body')
    }

    if (strict) {
      var first = firstchar(body) // firstchar 函数的定义在116行。

      if (first !== '{' && first !== '[') {
        throw new Error('invalid json')
      }
    }

    // 解析 body ，详见：
    // https://developer.mozilla.org/zh-CN/docs/Web/JavaScript/Reference/Global_Objects/JSON/parse
    return JSON.parse(body, reviver)
  }

  // 返回解析函数作为 expressjs 的中间件。
  return function jsonParser(req, res, next) {
    // req._body 标记着请求体是否已被解析，若 req._body 为 true ，则请求体
    // 已被解析。
    if (req._body) return next()
    req.body = req.body || {}

    // 检测请求体是否与 type 类型匹配。
    if (!typeis(req, type)) return next()

    // RFC 7159 sec 8.1
    var charset = typer.parse(req).parameters.charset || 'utf-8'
    if (charset.substr(0, 4).toLowerCase() !== 'utf-') {
      var err = new Error('unsupported charset')
      err.status = 415
      next(err)
      return
    }

    // read
    read(req, res, next, parse, {
      encoding: charset,
      inflate: inflate,
      limit: limit,
      verify: verify
    })
  }
}

/**
 * Get the first non-whitespace character in a string.
 *
 * @param {string} str
 * @return {function}
 * @api public
 */


function firstchar(str) {
  if (!str) return ''
  // 见第 40 行，匹配字符串中第一个非空字符。
  var match = firstcharRegExp.exec(str)
  return match ? match[1] : ''
}

lib/read.jslink

/*!
 * body-parser
 * Copyright(c) 2014 Douglas Christopher Wilson
 * MIT Licensed
 */

/**
 * Module dependencies.
 */

// 将流中的所有内容载入为 buffer 或字符串。参考：
// https://github.com/stream-utils/raw-body
var getBody = require('raw-body')

// 互相转换 buffer 与 js 字符串。参考 github：
// https://github.com/ashtuchkin/iconv-lite
var iconv = require('iconv-lite')

// 使得程序可以在退出时执行一个回调函数。参考：
// https://github.com/jshttp/on-finished
var onFinished = require('on-finished')

// expressjs 自带的媒体解析模块，详见 github：
// https://github.com/expressjs/media-typer
var typer = require('media-typer')

// nodejs 核心模块，提供数据压缩和解压功能。参考：
// http://nodejs.org/api/zlib.html
var zlib = require('zlib')

/**
 * Module exports.
 */

module.exports = read

/**
 * Read a request into a buffer and parse.
 *
 * @param {object} req
 * @param {object} res
 * @param {function} next
 * @param {function} parse
 * @param {object} options
 * @api private
 */

function read(req, res, next, parse, options) {
  var length
  var stream

  // flag as parsed
  req._body = true

  try {
    stream = contentstream(req, options.inflate) // 见第 129 行。
    length = stream.length
    delete stream.length
  } catch (err) {
    return next(err)
  }

  options = options || {} // 初始化 options 。
  options.length = length

  var encoding = options.encoding !== null
    ? options.encoding || 'utf-8'
    : null
  var verify = options.verify

  options.encoding = verify
    ? null
    : encoding

  // read body
  getBody(stream, options, function (err, body) {
    if (err) {
      if (!err.status) {
        err.status = 400
      }

      // read off entire request
      stream.resume()
      onFinished(req, function onfinished() {
        next(err)
      })
      return
    }

    // verify
    if (verify) {
      try {
        verify(req, res, body, encoding)
      } catch (err) {
        if (!err.status) err.status = 403
        return next(err)
      }
    }

    // parse
    try {
      body = typeof body !== 'string' && encoding !== null
        ? iconv.decode(body, encoding) // 将请求体解码为 js 字符串。
        : body
      req.body = parse(body)
    } catch (err) {
      if (!err.status) {
        err.body = body
        err.status = 400
      }
      return next(err)
    }

    next()
  })
}

/**
 * Get the content stream of the request.
 *
 * @param {object} req
 * @param {boolean} [inflate=true]
 * @return {object}
 * @api private
 */

// inflate 表示是否给数据解压缩。
function contentstream(req, inflate) {
  // req.headers 是 http 请求的请求头，详细参数见：
  // http://www.w3cschool.cc/http/http-header-fields.html
  
  // identity 代表没有压缩编码，见 RFC 7231 ， sec 3.1.2.2 。
  var encoding = req.headers['content-encoding'] || 'identity'
  var err
  var length = req.headers['content-length'] // 见 RFC 7230 ， sec 3.3.2 。
  var stream

  if (inflate === false && encoding !== 'identity') {
    err = new Error('content encoding unsupported')
    err.status = 415
    throw err
  }

  // 参考 zlib 文档： http://nodejs.org/api/zlib.html 。
  switch (encoding) {
    case 'deflate':
      stream = zlib.createInflate()
      req.pipe(stream)
      break
    case 'gzip':
      stream = zlib.createGunzip()
      req.pipe(stream)
      break
    case 'identity':
      stream = req
      stream.length = length
      break
    default:
      err = new Error('unsupported content encoding')
      err.status = 415
      throw err
  }

  return stream
}

lib/types/urlencoded.jslink

/*!
 * body-parser
 * Copyright(c) 2014 Jonathan Ong
 * Copyright(c) 2014 Douglas Christopher Wilson
 * MIT Licensed
 */

/**
 * Module dependencies.
 */

var bytes = require('bytes')
var deprecate = require('depd')('body-parser')
var read = require('../read')
var typer = require('media-typer')
var typeis = require('type-is')

/**
 * Module exports.
 */

module.exports = urlencoded

/**
 * Cache of parser modules.
 */

var parsers = Object.create(null)

/**
 * Create a middleware to parse urlencoded bodies.
 *
 * @param {object} [options]
 * @return {function}
 * @api public
 */

// 详见： https://github.com/expressjs/body-parser#bodyparserurlencodedoptions 。
function urlencoded(options){
  options = options || {};

  // notice because option default will flip in next major
  if (options.extended === undefined) {
    deprecate('undefined extended: provide extended option')
  }

  var extended = options.extended !== false // 是否采用 qs 模块解析 url 编码。
  var inflate = options.inflate !== false
  var limit = typeof options.limit !== 'number'
    ? bytes(options.limit || '100kb')
    : options.limit
  var type = options.type || 'urlencoded'
  var verify = options.verify || false

  if (verify !== false && typeof verify !== 'function') {
    throw new TypeError('option verify must be function')
  }

  // 选择解析器。
  var queryparse = extended
    ? extendedparser(options)
    : simpleparser(options)

  function parse(body) {
    return body.length
      ? queryparse(body)
      : {}
  }

  return function urlencodedParser(req, res, next) {
    if (req._body) return next();
    req.body = req.body || {}

    if (!typeis(req, type)) return next();

    var charset = typer.parse(req).parameters.charset || 'utf-8'
    if (charset.toLowerCase() !== 'utf-8') {
      var err = new Error('unsupported charset')
      err.status = 415
      next(err)
      return
    }

    // read
    read(req, res, next, parse, {
      encoding: charset,
      inflate: inflate,
      limit: limit,
      verify: verify
    })
  }
}

/**
 * Get the extended query parser.
 *
 * @param {object} options
 */

// 利用 qs 模块解析 url ，详见： https://github.com/hapijs/qs 。
function extendedparser(options) {
  var parameterLimit = options.parameterLimit !== undefined
    ? options.parameterLimit
    : 1000
  var parse = parser('qs')

  if (isNaN(parameterLimit) || parameterLimit < 1) {
    throw new TypeError('option parameterLimit must be a positive number')
  }

  if (isFinite(parameterLimit)) {
    parameterLimit = parameterLimit | 0
  }

  return function queryparse(body) {
    if (overlimit(body, parameterLimit)) {
      var err = new Error('too many parameters')
      err.status = 413
      throw err
    }

    return parse(body, {parameterLimit: parameterLimit})
  }
}

/**
 * Determine if the parameter count is over the limit.
 *
 * @param {string} body
 * @param {number} limit
 * @api private
 */

function overlimit(body, limit) {
  if (limit === Infinity) {
    return false
  }

  var count = 0
  var index = 0

  while ((index = body.indexOf('&', index)) !== -1) {
    count++
    index++

    if (count === limit) {
      return true
    }
  }

  return false
}

/**
 * Get parser for module name dynamically.
 *
 * @param {string} name
 * @return {function}
 * @api private
 */

function parser(name) {
  var mod = parsers[name]

  if (mod) {
    return mod.parse
  }

  // load module
  mod = parsers[name] = require(name)

  return mod.parse
}

/**
 * Get the simple query parser.
 *
 * @param {object} options
 */

function simpleparser(options) {
  var parameterLimit = options.parameterLimit !== undefined
    ? options.parameterLimit
    : 1000
  // 调用 nodejs 核心模块 querystring 解析 url 。详见：
  // http://nodejs.org/api/querystring.html
  var parse = parser('querystring')

  if (isNaN(parameterLimit) || parameterLimit < 1) {
    throw new TypeError('option parameterLimit must be a positive number')
  }

  if (isFinite(parameterLimit)) {
    parameterLimit = parameterLimit | 0
  }

  return function queryparse(body) {
    if (overlimit(body, parameterLimit)) {
      var err = new Error('too many parameters')
      err.status = 413
      throw err
    }

    return parse(body, undefined, undefined, {maxKeys: parameterLimit})
  }
}

半瓶

OrangeCLK

expressjs/body-parser 源码分析