From 465e05c0c5f63d48b3257b0a42e7739a235aa775 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Thu, 17 Aug 2023 00:55:32 +0200 Subject: [PATCH] lib.fileset.toSource: init --- .github/CODEOWNERS | 2 + doc/default.nix | 1 + doc/functions.md | 1 + doc/functions/fileset.section.md | 51 +++++ lib/README.md | 3 + lib/default.nix | 1 + lib/fileset/README.md | 183 ++++++++++++++++ lib/fileset/benchmark.sh | 94 +++++++++ lib/fileset/default.nix | 131 ++++++++++++ lib/fileset/internal.nix | 274 ++++++++++++++++++++++++ lib/fileset/mock-splitRoot.nix | 26 +++ lib/fileset/tests.sh | 350 +++++++++++++++++++++++++++++++ lib/tests/release.nix | 6 +- 13 files changed, 1122 insertions(+), 1 deletion(-) create mode 100644 doc/functions/fileset.section.md create mode 100644 lib/fileset/README.md create mode 100755 lib/fileset/benchmark.sh create mode 100644 lib/fileset/default.nix create mode 100644 lib/fileset/internal.nix create mode 100644 lib/fileset/mock-splitRoot.nix create mode 100755 lib/fileset/tests.sh diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 45bc58a9def7..362ac707b496 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -29,6 +29,8 @@ /lib/debug.nix @edolstra @Profpatsch /lib/asserts.nix @edolstra @Profpatsch /lib/path.* @infinisil @fricklerhandwerk +/lib/fileset @infinisil +/doc/functions/fileset.section.md @infinisil # Nixpkgs Internals /default.nix @Ericson2314 diff --git a/doc/default.nix b/doc/default.nix index f4270ae856d5..300ddaf28bc4 100644 --- a/doc/default.nix +++ b/doc/default.nix @@ -19,6 +19,7 @@ let { name = "options"; description = "NixOS / nixpkgs option handling"; } { name = "path"; description = "path functions"; } { name = "filesystem"; description = "filesystem functions"; } + { name = "fileset"; description = "file set functions"; } { name = "sources"; description = "source filtering functions"; } { name = "cli"; description = "command-line serialization functions"; } ]; diff --git a/doc/functions.md b/doc/functions.md index 09033c9e3c19..551ba522a904 100644 --- a/doc/functions.md +++ b/doc/functions.md @@ -8,4 +8,5 @@ functions/generators.section.md functions/debug.section.md functions/prefer-remote-fetch.section.md functions/nix-gitignore.section.md +functions/fileset.section.md ``` diff --git a/doc/functions/fileset.section.md b/doc/functions/fileset.section.md new file mode 100644 index 000000000000..b24ebe26cc3b --- /dev/null +++ b/doc/functions/fileset.section.md @@ -0,0 +1,51 @@ + + +# File sets {#sec-fileset} + +The [`lib.fileset`](#sec-functions-library-fileset) library allows you to work with _file sets_. +A file set is a mathematical set of local files that can be added to the Nix store for use in Nix derivations. +File sets are easy and safe to use, providing obvious and composable semantics with good error messages to prevent mistakes. + +These sections apply to the entire library. +See the [function reference](#sec-functions-library-fileset) for function-specific documentation. + +The file set library is currently very limited but is being expanded to include more functions over time. + +## Implicit coercion from paths to file sets {#sec-fileset-path-coercion} + +All functions accepting file sets as arguments can also accept [paths](https://nixos.org/manual/nix/stable/language/values.html#type-path) as arguments. +Such path arguments are implicitly coerced to file sets containing all files under that path: +- A path to a file turns into a file set containing that single file. +- A path to a directory turns into a file set containing all files _recursively_ in that directory. + +If the path points to a non-existent location, an error is thrown. + +::: {.note} +Just like in Git, file sets cannot represent empty directories. +Because of this, a path to a directory that contains no files (recursively) will turn into a file set containing no files. +::: + +:::{.note} +File set coercion does _not_ add any of the files under the coerced paths to the store. +Only the [`toSource`](#function-library-lib.fileset.toSource) function adds files to the Nix store, and only those files contained in the `fileset` argument. +This is in contrast to using [paths in string interpolation](https://nixos.org/manual/nix/stable/language/values.html#type-path), which does add the entire referenced path to the store. +::: + +### Example {#sec-fileset-path-coercion-example} + +Assume we are in a local directory with a file hierarchy like this: +``` +├─ a/ +│ ├─ x (file) +│ └─ b/ +│   └─ y (file) +└─ c/ +   └─ d/ +``` + +Here's a listing of which files get included when different path expressions get coerced to file sets: +- `./.` as a file set contains both `a/x` and `a/b/y` (`c/` does not contain any files and is therefore omitted). +- `./a` as a file set contains both `a/x` and `a/b/y`. +- `./a/x` as a file set contains only `a/x`. +- `./a/b` as a file set contains only `a/b/y`. +- `./c` as a file set is empty, since neither `c` nor `c/d` contain any files. diff --git a/lib/README.md b/lib/README.md index ac7cbd4330ad..627086843db7 100644 --- a/lib/README.md +++ b/lib/README.md @@ -70,4 +70,7 @@ tests/filesystem.sh # Run the lib.path property tests path/tests/prop.sh + +# Run the lib.fileset tests +fileset/tests.sh ``` diff --git a/lib/default.nix b/lib/default.nix index 73b8ad871544..df92cb903916 100644 --- a/lib/default.nix +++ b/lib/default.nix @@ -54,6 +54,7 @@ let # Eval-time filesystem handling path = callLibs ./path; filesystem = callLibs ./filesystem.nix; + fileset = callLibs ./fileset; sources = callLibs ./sources.nix; # back-compat aliases diff --git a/lib/fileset/README.md b/lib/fileset/README.md new file mode 100644 index 000000000000..dbb591a4c8c8 --- /dev/null +++ b/lib/fileset/README.md @@ -0,0 +1,183 @@ +# File set library + +The main goal of the file set library is to be able to select local files that should be added to the Nix store. +It should have the following properties: +- Easy: + The functions should have obvious semantics, be low in number and be composable. +- Safe: + Throw early and helpful errors when mistakes are detected. +- Lazy: + Only compute values when necessary. + +Non-goals are: +- Efficient: + If the abstraction proves itself worthwhile but too slow, it can be still be optimized further. + +## Tests + +Tests are declared in [`tests.sh`](./tests.sh) and can be run using +``` +./tests.sh +``` + +## Benchmark + +A simple benchmark against the HEAD commit can be run using +``` +./benchmark.sh HEAD +``` + +This is intended to be run manually and is not checked by CI. + +## Internal representation + +The internal representation is versioned in order to allow file sets from different Nixpkgs versions to be composed with each other, see [`internal.nix`](./internal.nix) for the versions and conversions between them. +This section describes only the current representation, but past versions will have to be supported by the code. + +### `fileset` + +An attribute set with these values: + +- `_type` (constant string `"fileset"`): + Tag to indicate this value is a file set. + +- `_internalVersion` (constant string equal to the current version): + Version of the representation + +- `_internalBase` (path): + Any files outside of this path cannot influence the set of files. + This is always a directory. + +- `_internalTree` ([filesetTree](#filesettree)): + A tree representation of all included files under `_internalBase`. + +- `__noEval` (error): + An error indicating that directly evaluating file sets is not supported. + +## `filesetTree` + +One of the following: + +- `{ = filesetTree; }`: + A directory with a nested `filesetTree` value for every directory entry. + Even entries that aren't included are present as `null` because it improves laziness and allows using this as a sort of `builtins.readDir` cache. + +- `"directory"`: + A directory with all its files included recursively, allowing early cutoff for some operations. + This specific string is chosen to be compatible with `builtins.readDir` for a simpler implementation. + +- `"regular"`, `"symlink"`, `"unknown"` or any other non-`"directory"` string: + A nested file with its file type. + These specific strings are chosen to be compatible with `builtins.readDir` for a simpler implementation. + Distinguishing between different file types is not strictly necessary for the functionality this library, + but it does allow nicer printing of file sets. + +- `null`: + A file or directory that is excluded from the tree. + It may still exist on the file system. + +## API design decisions + +This section justifies API design decisions. + +### Internal structure + +The representation of the file set data type is internal and can be changed over time. + +Arguments: +- (+) The point of this library is to provide high-level functions, users don't need to be concerned with how it's implemented +- (+) It allows adjustments to the representation, which is especially useful in the early days of the library. +- (+) It still allows the representation to be stabilized later if necessary and if it has proven itself + +### Influence tracking + +File set operations internally track the top-most directory that could influence the exact contents of a file set. +Specifically, `toSource` requires that the given `fileset` is completely determined by files within the directory specified by the `root` argument. +For example, even with `dir/file.txt` being the only file in `./.`, `toSource { root = ./dir; fileset = ./.; }` gives an error. +This is because `fileset` may as well be the result of filtering `./.` in a way that excludes `dir`. + +Arguments: +- (+) This gives us the guarantee that adding new files to a project never breaks a file set expression. + This is also true in a lesser form for removed files: + only removing files explicitly referenced by paths can break a file set expression. +- (+) This can be removed later, if we discover it's too restrictive +- (-) It leads to errors when a sensible result could sometimes be returned, such as in the above example. + +### Empty directories + +File sets can only represent a _set_ of local files, directories on their own are not representable. + +Arguments: +- (+) There does not seem to be a sensible set of combinators when directories can be represented on their own. + Here's some possibilities: + - `./.` represents the files in `./.` _and_ the directory itself including its subdirectories, meaning that even if there's no files, the entire structure of `./.` is preserved + + In that case, what should `fileFilter (file: false) ./.` return? + It could return the entire directory structure unchanged, but with all files removed, which would not be what one would expect. + + Trying to have a filter function that also supports directories will lead to the question of: + What should the behavior be if `./foo` itself is excluded but all of its contents are included? + It leads to having to define when directories are recursed into, but then we're effectively back at how the `builtins.path`-based filters work. + + - `./.` represents all files in `./.` _and_ the directory itself, but not its subdirectories, meaning that at least `./.` will be preserved even if it's empty. + + In that case, `intersect ./. ./foo` should only include files and no directories themselves, since `./.` includes only `./.` as a directory, and same for `./foo`, so there's no overlap in directories. + But intuitively this operation should result in the same as `./foo` – everything else is just confusing. +- (+) This matches how Git only supports files, so developers should already be used to it. +- (-) Empty directories (even if they contain nested directories) are neither representable nor preserved when coercing from paths. + - (+) It is very rare that empty directories are necessary. + - (+) We can implement a workaround, allowing `toSource` to take an extra argument for ensuring certain extra directories exist in the result. +- (-) It slows down store imports, since the evaluator needs to traverse the entire tree to remove any empty directories + - (+) This can still be optimized by introducing more Nix builtins if necessary + +### String paths + +File sets do not support Nix store paths in strings such as `"/nix/store/...-source"`. + +Arguments: +- (+) Such paths are usually produced by derivations, which means `toSource` would either: + - Require IFD if `builtins.path` is used as the underlying primitive + - Require importing the entire `root` into the store such that derivations can be used to do the filtering +- (+) The convenient path coercion like `union ./foo ./bar` wouldn't work for absolute paths, requiring more verbose alternate interfaces: + - `let root = "/nix/store/...-source"; in union "${root}/foo" "${root}/bar"` + + Verbose and dangerous because if `root` was a path, the entire path would get imported into the store. + + - `toSource { root = "/nix/store/...-source"; fileset = union "./foo" "./bar"; }` + + Does not allow debug printing intermediate file set contents, since we don't know the paths contents before having a `root`. + + - `let fs = lib.fileset.withRoot "/nix/store/...-source"; in fs.union "./foo" "./bar"` + + Makes library functions impure since they depend on the contextual root path, questionable composability. + +- (+) The point of the file set abstraction is to specify which files should get imported into the store. + + This use case makes little sense for files that are already in the store. + This should be a separate abstraction as e.g. `pkgs.drvLayout` instead, which could have a similar interface but be specific to derivations. + Additional capabilities could be supported that can't be done at evaluation time, such as renaming files, creating new directories, setting executable bits, etc. + +### Single files + +File sets cannot add single files to the store, they can only import files under directories. + +Arguments: +- (+) There's no point in using this library for a single file, since you can't do anything other than add it to the store or not. + And it would be unclear how the library should behave if the one file wouldn't be added to the store: + `toSource { root = ./file.nix; fileset = ; }` has no reasonable result because returing an empty store path wouldn't match the file type, and there's no way to have an empty file store path, whatever that would mean. + +## To update in the future + +Here's a list of places in the library that need to be updated in the future: +- > The file set library is currently very limited but is being expanded to include more functions over time. + + in [the manual](../../doc/functions/fileset.section.md) +- > Currently the only way to construct file sets is using implicit coercion from paths. + + in [the `toSource` reference](./default.nix) +- > For now filesets are always paths + + in [the `toSource` implementation](./default.nix), also update the variable name there +- Once a tracing function exists, `__noEval` in [internal.nix](./internal.nix) should mention it +- If/Once a function to convert `lib.sources` values into file sets exists, the `_coerce` and `toSource` functions should be updated to mention that function in the error when such a value is passed +- If/Once a function exists that can optionally include a path depending on whether it exists, the error message for the path not existing in `_coerce` should mention the new function diff --git a/lib/fileset/benchmark.sh b/lib/fileset/benchmark.sh new file mode 100755 index 000000000000..f72686c4ab3f --- /dev/null +++ b/lib/fileset/benchmark.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash + +# Benchmarks lib.fileset +# Run: +# [nixpkgs]$ lib/fileset/benchmark.sh HEAD + +set -euo pipefail +shopt -s inherit_errexit dotglob + +if (( $# == 0 )); then + echo "Usage: $0 HEAD" + echo "Benchmarks the current tree against the HEAD commit. Any git ref will work." + exit 1 +fi +compareTo=$1 + +SCRIPT_FILE=$(readlink -f "${BASH_SOURCE[0]}") +SCRIPT_DIR=$(dirname "$SCRIPT_FILE") + +nixpkgs=$(cd "$SCRIPT_DIR/../.."; pwd) + +tmp="$(mktemp -d)" +clean_up() { + rm -rf "$tmp" +} +trap clean_up EXIT SIGINT SIGTERM +work="$tmp/work" +mkdir "$work" +cd "$work" + +# Create a fairly populated tree +touch f{0..5} +mkdir d{0..5} +mkdir e{0..5} +touch d{0..5}/f{0..5} +mkdir -p d{0..5}/d{0..5} +mkdir -p e{0..5}/e{0..5} +touch d{0..5}/d{0..5}/f{0..5} +mkdir -p d{0..5}/d{0..5}/d{0..5} +mkdir -p e{0..5}/e{0..5}/e{0..5} +touch d{0..5}/d{0..5}/d{0..5}/f{0..5} +mkdir -p d{0..5}/d{0..5}/d{0..5}/d{0..5} +mkdir -p e{0..5}/e{0..5}/e{0..5}/e{0..5} +touch d{0..5}/d{0..5}/d{0..5}/d{0..5}/f{0..5} + +bench() { + NIX_PATH=nixpkgs=$1 NIX_SHOW_STATS=1 NIX_SHOW_STATS_PATH=$tmp/stats.json \ + nix-instantiate --eval --strict --show-trace >/dev/null \ + --expr '(import ).fileset.toSource { root = ./.; fileset = ./.; }' + cat "$tmp/stats.json" +} + +echo "Running benchmark on index" >&2 +bench "$nixpkgs" > "$tmp/new.json" +( + echo "Checking out $compareTo" >&2 + git -C "$nixpkgs" worktree add --quiet "$tmp/worktree" "$compareTo" + trap 'git -C "$nixpkgs" worktree remove "$tmp/worktree"' EXIT + echo "Running benchmark on $compareTo" >&2 + bench "$tmp/worktree" > "$tmp/old.json" +) + +declare -a stats=( + ".envs.elements" + ".envs.number" + ".gc.totalBytes" + ".list.concats" + ".list.elements" + ".nrFunctionCalls" + ".nrLookups" + ".nrOpUpdates" + ".nrPrimOpCalls" + ".nrThunks" + ".sets.elements" + ".sets.number" + ".symbols.number" + ".values.number" +) + +different=0 +for stat in "${stats[@]}"; do + oldValue=$(jq "$stat" "$tmp/old.json") + newValue=$(jq "$stat" "$tmp/new.json") + if (( oldValue != newValue )); then + percent=$(bc <<< "scale=100; result = 100/$oldValue*$newValue; scale=4; result / 1") + if (( oldValue < newValue )); then + echo -e "Statistic $stat ($newValue) is \e[0;31m$percent% (+$(( newValue - oldValue )))\e[0m of the old value $oldValue" >&2 + else + echo -e "Statistic $stat ($newValue) is \e[0;32m$percent% (-$(( oldValue - newValue )))\e[0m of the old value $oldValue" >&2 + fi + (( different++ )) || true + fi +done +echo "$different stats differ between the current tree and $compareTo" diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix new file mode 100644 index 000000000000..b30125265520 --- /dev/null +++ b/lib/fileset/default.nix @@ -0,0 +1,131 @@ +{ lib }: +let + + inherit (import ./internal.nix { inherit lib; }) + _coerce + _toSourceFilter + ; + + inherit (builtins) + isPath + pathExists + typeOf + ; + + inherit (lib.path) + hasPrefix + splitRoot + ; + + inherit (lib.strings) + isStringLike + ; + + inherit (lib.filesystem) + pathType + ; + + inherit (lib.sources) + cleanSourceWith + ; + +in { + + /* + Add the local files contained in `fileset` to the store as a single [store path](https://nixos.org/manual/nix/stable/glossary#gloss-store-path) rooted at `root`. + + The result is the store path as a string-like value, making it usable e.g. as the `src` of a derivation, or in string interpolation: + ```nix + stdenv.mkDerivation { + src = lib.fileset.toSource { ... }; + # ... + } + ``` + + The name of the store path is always `source`. + + Type: + toSource :: { + root :: Path, + fileset :: FileSet, + } -> SourceLike + + Example: + # Import the current directory into the store but only include files under ./src + toSource { root = ./.; fileset = ./src; } + => "/nix/store/...-source" + + # The file set coerced from path ./bar could contain files outside the root ./foo, which is not allowed + toSource { root = ./foo; fileset = ./bar; } + => + + # The root has to be a local filesystem path + toSource { root = "/nix/store/...-source"; fileset = ./.; } + => + */ + toSource = { + /* + (required) The local directory [path](https://nixos.org/manual/nix/stable/language/values.html#type-path) that will correspond to the root of the resulting store path. + Paths in [strings](https://nixos.org/manual/nix/stable/language/values.html#type-string), including Nix store paths, cannot be passed as `root`. + `root` has to be a directory. + + +:::{.note} +Changing `root` only affects the directory structure of the resulting store path, it does not change which files are added to the store. +The only way to change which files get added to the store is by changing the `fileset` attribute. +::: + */ + root, + /* + (required) The file set whose files to import into the store. + Currently the only way to construct file sets is using [implicit coercion from paths](#sec-fileset-path-coercion). + If a directory does not recursively contain any file, it is omitted from the store path contents. + */ + fileset, + }: + let + # We cannot rename matched attribute arguments, so let's work around it with an extra `let in` statement + # For now filesets are always paths + filesetPath = fileset; + in + let + fileset = _coerce "lib.fileset.toSource: `fileset`" filesetPath; + rootFilesystemRoot = (splitRoot root).root; + filesetFilesystemRoot = (splitRoot fileset._internalBase).root; + in + if ! isPath root then + if isStringLike root then + throw '' + lib.fileset.toSource: `root` "${toString root}" is a string-like value, but it should be a path instead. + Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'' + else + throw '' + lib.fileset.toSource: `root` is of type ${typeOf root}, but it should be a path instead.'' + # Currently all Nix paths have the same filesystem root, but this could change in the future. + # See also ../path/README.md + else if rootFilesystemRoot != filesetFilesystemRoot then + throw '' + lib.fileset.toSource: Filesystem roots are not the same for `fileset` and `root` "${toString root}": + `root`: root "${toString rootFilesystemRoot}" + `fileset`: root "${toString filesetFilesystemRoot}" + Different roots are not supported.'' + else if ! pathExists root then + throw '' + lib.fileset.toSource: `root` ${toString root} does not exist.'' + else if pathType root != "directory" then + throw '' + lib.fileset.toSource: `root` ${toString root} is a file, but it should be a directory instead. Potential solutions: + - If you want to import the file into the store _without_ a containing directory, use string interpolation or `builtins.path` instead of this function. + - If you want to import the file into the store _with_ a containing directory, set `root` to the containing directory, such as ${toString (dirOf root)}, and set `fileset` to the file path.'' + else if ! hasPrefix root fileset._internalBase then + throw '' + lib.fileset.toSource: `fileset` could contain files in ${toString fileset._internalBase}, which is not under the `root` ${toString root}. Potential solutions: + - Set `root` to ${toString fileset._internalBase} or any directory higher up. This changes the layout of the resulting store path. + - Set `fileset` to a file set that cannot contain files outside the `root` ${toString root}. This could change the files included in the result.'' + else + cleanSourceWith { + name = "source"; + src = root; + filter = _toSourceFilter fileset; + }; +} diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix new file mode 100644 index 000000000000..eeaa7d96875e --- /dev/null +++ b/lib/fileset/internal.nix @@ -0,0 +1,274 @@ +{ lib ? import ../. }: +let + + inherit (builtins) + isAttrs + isPath + isString + pathExists + readDir + typeOf + split + ; + + inherit (lib.attrsets) + attrValues + mapAttrs + ; + + inherit (lib.filesystem) + pathType + ; + + inherit (lib.lists) + all + elemAt + length + ; + + inherit (lib.path) + append + splitRoot + ; + + inherit (lib.path.subpath) + components + ; + + inherit (lib.strings) + isStringLike + concatStringsSep + substring + stringLength + ; + +in +# Rare case of justified usage of rec: +# - This file is internal, so the return value doesn't matter, no need to make things overridable +# - The functions depend on each other +# - We want to expose all of these functions for easy testing +rec { + + # If you change the internal representation, make sure to: + # - Update this version + # - Adjust _coerce to also accept and coerce older versions + # - Update the description of the internal representation in ./README.md + _currentVersion = 0; + + # Create a fileset, see ./README.md#fileset + # Type: path -> filesetTree -> fileset + _create = base: tree: { + _type = "fileset"; + + _internalVersion = _currentVersion; + _internalBase = base; + _internalTree = tree; + + # Double __ to make it be evaluated and ordered first + __noEval = throw '' + lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.''; + }; + + # Coerce a value to a fileset, erroring when the value cannot be coerced. + # The string gives the context for error messages. + # Type: String -> Path -> fileset + _coerce = context: value: + if value._type or "" == "fileset" then + if value._internalVersion > _currentVersion then + throw '' + ${context} is a file set created from a future version of the file set library with a different internal representation: + - Internal version of the file set: ${toString value._internalVersion} + - Internal version of the library: ${toString _currentVersion} + Make sure to update your Nixpkgs to have a newer version of `lib.fileset`.'' + else + value + else if ! isPath value then + if isStringLike value then + throw '' + ${context} "${toString value}" is a string-like value, but it should be a path instead. + Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'' + else + throw '' + ${context} is of type ${typeOf value}, but it should be a path instead.'' + else if ! pathExists value then + throw '' + ${context} ${toString value} does not exist.'' + else + _singleton value; + + # Create a file set from a path. + # Type: Path -> fileset + _singleton = path: + let + type = pathType path; + in + if type == "directory" then + _create path type + else + # This turns a file path ./default.nix into a fileset with + # - _internalBase: ./. + # - _internalTree: { + # "default.nix" = ; + # # Other directory entries + # = null; + # } + # See ./README.md#single-files + _create (dirOf path) + (_nestTree + (dirOf path) + [ (baseNameOf path) ] + type + ); + + /* + Nest a filesetTree under some extra components, while filling out all the other directory entries that aren't included with null + + _nestTree ./. [ "foo" "bar" ] tree == { + foo = { + bar = tree; + = null; + } + = null; + } + + Type: Path -> [ String ] -> filesetTree -> filesetTree + */ + _nestTree = targetBase: extraComponents: tree: + let + recurse = index: focusPath: + if index == length extraComponents then + tree + else + mapAttrs (_: _: null) (readDir focusPath) + // { + ${elemAt extraComponents index} = recurse (index + 1) (append focusPath (elemAt extraComponents index)); + }; + in + recurse 0 targetBase; + + # Expand "directory" filesetTree representation to the equivalent { = filesetTree; } + # Type: Path -> filesetTree -> { = filesetTree; } + _directoryEntries = path: value: + if isAttrs value then + value + else + readDir path; + + /* + Simplify a filesetTree recursively: + - Replace all directories that have no files with `null` + This removes directories that would be empty + - Replace all directories with all files with `"directory"` + This speeds up the source filter function + + Note that this function is strict, it evaluates the entire tree + + Type: Path -> filesetTree -> filesetTree + */ + _simplifyTree = path: tree: + if tree == "directory" || isAttrs tree then + let + entries = _directoryEntries path tree; + simpleSubtrees = mapAttrs (name: _simplifyTree (path + "/${name}")) entries; + subtreeValues = attrValues simpleSubtrees; + in + # This triggers either when all files in a directory are filtered out + # Or when the directory doesn't contain any files at all + if all isNull subtreeValues then + null + # Triggers when we have the same as a `readDir path`, so we can turn it back into an equivalent "directory". + else if all isString subtreeValues then + "directory" + else + simpleSubtrees + else + tree; + + # Turn a fileset into a source filter function suitable for `builtins.path` + # Only directories recursively containing at least one files are recursed into + # Type: Path -> fileset -> (String -> String -> Bool) + _toSourceFilter = fileset: + let + # Simplify the tree, necessary to make sure all empty directories are null + # which has the effect that they aren't included in the result + tree = _simplifyTree fileset._internalBase fileset._internalTree; + + # Decompose the base into its components + # See ../path/README.md for why we're not just using `toString` + baseComponents = components (splitRoot fileset._internalBase).subpath; + + # The base path as a string with a single trailing slash + baseString = + if baseComponents == [] then + # Need to handle the filesystem root specially + "/" + else + "/" + concatStringsSep "/" baseComponents + "/"; + + baseLength = stringLength baseString; + + # Check whether a list of path components under the base path exists in the tree. + # This function is called often, so it should be fast. + # Type: [ String ] -> Bool + inTree = components: + let + recurse = index: localTree: + if isAttrs localTree then + # We have an attribute set, meaning this is a directory with at least one file + if index >= length components then + # The path may have no more components though, meaning the filter is running on the directory itself, + # so we always include it, again because there's at least one file in it. + true + else + # If we do have more components, the filter runs on some entry inside this directory, so we need to recurse + # We do +2 because builtins.split is an interleaved list of the inbetweens and the matches + recurse (index + 2) localTree.${elemAt components index} + else + # If it's not an attribute set it can only be either null (in which case it's not included) + # or a string ("directory" or "regular", etc.) in which case it's included + localTree != null; + in recurse 0 tree; + + # Filter suited when there's no files + empty = _: _: false; + + # Filter suited when there's some files + # This can't be used for when there's no files, because the base directory is always included + nonEmpty = + path: _: + let + # Add a slash to the path string, turning "/foo" to "/foo/", + # making sure to not have any false prefix matches below. + # Note that this would produce "//" for "/", + # but builtins.path doesn't call the filter function on the `path` argument itself, + # meaning this function can never receive "/" as an argument + pathSlash = path + "/"; + in + # Same as `hasPrefix pathSlash baseString`, but more efficient. + # With base /foo/bar we need to include /foo: + # hasPrefix "/foo/" "/foo/bar/" + if substring 0 (stringLength pathSlash) baseString == pathSlash then + true + # Same as `! hasPrefix baseString pathSlash`, but more efficient. + # With base /foo/bar we need to exclude /baz + # ! hasPrefix "/baz/" "/foo/bar/" + else if substring 0 baseLength pathSlash != baseString then + false + else + # Same as `removePrefix baseString path`, but more efficient. + # From the above code we know that hasPrefix baseString pathSlash holds, so this is safe. + # We don't use pathSlash here because we only needed the trailing slash for the prefix matching. + # With base /foo and path /foo/bar/baz this gives + # inTree (split "/" (removePrefix "/foo/" "/foo/bar/baz")) + # == inTree (split "/" "bar/baz") + # == inTree [ "bar" "baz" ] + inTree (split "/" (substring baseLength (-1) path)); + in + # Special case because the code below assumes that the _internalBase is always included in the result + # which shouldn't be done when we have no files at all in the base + if tree == null then + empty + else + nonEmpty; + +} diff --git a/lib/fileset/mock-splitRoot.nix b/lib/fileset/mock-splitRoot.nix new file mode 100644 index 000000000000..3c18ab1b1afd --- /dev/null +++ b/lib/fileset/mock-splitRoot.nix @@ -0,0 +1,26 @@ +# This overlay implements mocking of the lib.path.splitRoot function +# It pretends that the last component named "mock-root" is the root: +# +# splitRoot /foo/mock-root/bar/mock-root/baz +# => { +# root = /foo/mock-root/bar/mock-root; +# subpath = "./baz"; +# } +self: super: { + path = super.path // { + splitRoot = path: + let + parts = super.path.splitRoot path; + components = self.path.subpath.components parts.subpath; + count = self.length components; + rootIndex = count - self.lists.findFirstIndex + (component: component == "mock-root") + (self.length components) + (self.reverseList components); + root = self.path.append parts.root (self.path.subpath.join (self.take rootIndex components)); + subpath = self.path.subpath.join (self.drop rootIndex components); + in { + inherit root subpath; + }; + }; +} diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh new file mode 100755 index 000000000000..9492edf4f55e --- /dev/null +++ b/lib/fileset/tests.sh @@ -0,0 +1,350 @@ +#!/usr/bin/env bash + +# Tests lib.fileset +# Run: +# [nixpkgs]$ lib/fileset/tests.sh +# or: +# [nixpkgs]$ nix-build lib/tests/release.nix + +set -euo pipefail +shopt -s inherit_errexit dotglob + +die() { + # The second to last entry contains the line number of the top-level caller + lineIndex=$(( ${#BASH_LINENO[@]} - 2 )) + echo >&2 -e "test case at ${BASH_SOURCE[0]}:${BASH_LINENO[$lineIndex]} failed:" "$@" + exit 1 +} + +if test -n "${TEST_LIB:-}"; then + NIX_PATH=nixpkgs="$(dirname "$TEST_LIB")" +else + NIX_PATH=nixpkgs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.."; pwd)" +fi +export NIX_PATH + +tmp="$(mktemp -d)" +clean_up() { + rm -rf "$tmp" +} +trap clean_up EXIT SIGINT SIGTERM +work="$tmp/work" +mkdir "$work" +cd "$work" + +# Crudely unquotes a JSON string by just taking everything between the first and the second quote. +# We're only using this for resulting /nix/store paths, which can't contain " anyways, +# nor can they contain any other characters that would need to be escaped specially in JSON +# This way we don't need to add a dependency on e.g. jq +crudeUnquoteJSON() { + cut -d \" -f2 +} + +prefixExpression='let + lib = import ; + internal = import { + inherit lib; + }; +in +with lib; +with internal; +with lib.fileset;' + +# Check that a nix expression evaluates successfully (strictly, coercing to json, read-write-mode). +# The expression has `lib.fileset` in scope. +# If a second argument is provided, the result is checked against it as a regex. +# Otherwise, the result is output. +# Usage: expectSuccess NIX [REGEX] +expectSuccess() { + local expr=$1 + if [[ "$#" -gt 1 ]]; then + local expectedResultRegex=$2 + fi + if ! result=$(nix-instantiate --eval --strict --json --read-write-mode --show-trace \ + --expr "$prefixExpression $expr"); then + die "$expr failed to evaluate, but it was expected to succeed" + fi + if [[ -v expectedResultRegex ]]; then + if [[ ! "$result" =~ $expectedResultRegex ]]; then + die "$expr should have evaluated to this regex pattern:\n\n$expectedResultRegex\n\nbut this was the actual result:\n\n$result" + fi + else + echo "$result" + fi +} + +# Check that a nix expression fails to evaluate (strictly, coercing to json, read-write-mode). +# And check the received stderr against a regex +# The expression has `lib.fileset` in scope. +# Usage: expectFailure NIX REGEX +expectFailure() { + local expr=$1 + local expectedErrorRegex=$2 + if result=$(nix-instantiate --eval --strict --json --read-write-mode --show-trace 2>"$tmp/stderr" \ + --expr "$prefixExpression $expr"); then + die "$expr evaluated successfully to $result, but it was expected to fail" + fi + stderr=$(<"$tmp/stderr") + if [[ ! "$stderr" =~ $expectedErrorRegex ]]; then + die "$expr should have errored with this regex pattern:\n\n$expectedErrorRegex\n\nbut this was the actual error:\n\n$stderr" + fi +} + +# We conditionally use inotifywait in checkFileset. +# Check early whether it's available +# TODO: Darwin support, though not crucial since we have Linux CI +if type inotifywait 2>/dev/null >/dev/null; then + canMonitorFiles=1 +else + echo "Warning: Not checking that excluded files don't get accessed since inotifywait is not available" >&2 + canMonitorFiles= +fi + +# Check whether a file set includes/excludes declared paths as expected, usage: +# +# tree=( +# [a/b] =1 # Declare that file a/b should exist and expect it to be included in the store path +# [c/a] = # Declare that file c/a should exist and expect it to be excluded in the store path +# [c/d/]= # Declare that directory c/d/ should exist and expect it to be excluded in the store path +# ) +# checkFileset './a' # Pass the fileset as the argument +declare -A tree +checkFileset() ( + # New subshell so that we can have a separate trap handler, see `trap` below + local fileset=$1 + + # Process the tree into separate arrays for included paths, excluded paths and excluded files. + # Also create all the paths in the local directory + local -a included=() + local -a excluded=() + local -a excludedFiles=() + for p in "${!tree[@]}"; do + # If keys end with a `/` we treat them as directories, otherwise files + if [[ "$p" =~ /$ ]]; then + mkdir -p "$p" + isFile= + else + mkdir -p "$(dirname "$p")" + touch "$p" + isFile=1 + fi + case "${tree[$p]}" in + 1) + included+=("$p") + ;; + 0) + excluded+=("$p") + if [[ -n "$isFile" ]]; then + excludedFiles+=("$p") + fi + ;; + *) + die "Unsupported tree value: ${tree[$p]}" + esac + done + + # Start inotifywait in the background to monitor all excluded files (if any) + if [[ -n "$canMonitorFiles" ]] && (( "${#excludedFiles[@]}" != 0 )); then + coproc watcher { + # inotifywait outputs a string on stderr when ready + # Redirect it to stdout so we can access it from the coproc's stdout fd + # exec so that the coprocess is inotify itself, making the kill below work correctly + # See below why we listen to both open and delete_self events + exec inotifywait --format='%e %w' --event open,delete_self --monitor "${excludedFiles[@]}" 2>&1 + } + # This will trigger when this subshell exits, no matter if successful or not + # After exiting the subshell, the parent shell will continue executing + trap 'kill "${watcher_PID}"' exit + + # Synchronously wait until inotifywait is ready + while read -r -u "${watcher[0]}" line && [[ "$line" != "Watches established." ]]; do + : + done + fi + + # Call toSource with the fileset, triggering open events for all files that are added to the store + expression="toSource { root = ./.; fileset = $fileset; }" + # crudeUnquoteJSON is safe because we get back a store path in a string + storePath=$(expectSuccess "$expression" | crudeUnquoteJSON) + + # Remove all files immediately after, triggering delete_self events for all of them + rm -rf -- * + + # Only check for the inotify events if we actually started inotify earlier + if [[ -v watcher ]]; then + # Get the first event + read -r -u "${watcher[0]}" event file + + # There's only these two possible event timelines: + # - open, ..., open, delete_self, ..., delete_self: If some excluded files were read + # - delete_self, ..., delete_self: If no excluded files were read + # So by looking at the first event we can figure out which one it is! + case "$event" in + OPEN) + die "$expression opened excluded file $file when it shouldn't have" + ;; + DELETE_SELF) + # Expected events + ;; + *) + die "Unexpected event type '$event' on file $file that should be excluded" + ;; + esac + fi + + # For each path that should be included, make sure it does occur in the resulting store path + for p in "${included[@]}"; do + if [[ ! -e "$storePath/$p" ]]; then + die "$expression doesn't include path $p when it should have" + fi + done + + # For each path that should be excluded, make sure it doesn't occur in the resulting store path + for p in "${excluded[@]}"; do + if [[ -e "$storePath/$p" ]]; then + die "$expression included path $p when it shouldn't have" + fi + done +) + + +#### Error messages ##### + +# Absolute paths in strings cannot be passed as `root` +expectFailure 'toSource { root = "/nix/store/foobar"; fileset = ./.; }' 'lib.fileset.toSource: `root` "/nix/store/foobar" is a string-like value, but it should be a path instead. +\s*Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.' + +# Only paths are accepted as `root` +expectFailure 'toSource { root = 10; fileset = ./.; }' 'lib.fileset.toSource: `root` is of type int, but it should be a path instead.' + +# Different filesystem roots in root and fileset are not supported +mkdir -p {foo,bar}/mock-root +expectFailure 'with ((import ).extend (import )).fileset; + toSource { root = ./foo/mock-root; fileset = ./bar/mock-root; } +' 'lib.fileset.toSource: Filesystem roots are not the same for `fileset` and `root` "'"$work"'/foo/mock-root": +\s*`root`: root "'"$work"'/foo/mock-root" +\s*`fileset`: root "'"$work"'/bar/mock-root" +\s*Different roots are not supported.' +rm -rf * + +# `root` needs to exist +expectFailure 'toSource { root = ./a; fileset = ./.; }' 'lib.fileset.toSource: `root` '"$work"'/a does not exist.' + +# `root` needs to be a file +touch a +expectFailure 'toSource { root = ./a; fileset = ./a; }' 'lib.fileset.toSource: `root` '"$work"'/a is a file, but it should be a directory instead. Potential solutions: +\s*- If you want to import the file into the store _without_ a containing directory, use string interpolation or `builtins.path` instead of this function. +\s*- If you want to import the file into the store _with_ a containing directory, set `root` to the containing directory, such as '"$work"', and set `fileset` to the file path.' +rm -rf * + +# Only paths under `root` should be able to influence the result +mkdir a +expectFailure 'toSource { root = ./a; fileset = ./.; }' 'lib.fileset.toSource: `fileset` could contain files in '"$work"', which is not under the `root` '"$work"'/a. Potential solutions: +\s*- Set `root` to '"$work"' or any directory higher up. This changes the layout of the resulting store path. +\s*- Set `fileset` to a file set that cannot contain files outside the `root` '"$work"'/a. This could change the files included in the result.' +rm -rf * + +# Path coercion only works for paths +expectFailure 'toSource { root = ./.; fileset = 10; }' 'lib.fileset.toSource: `fileset` is of type int, but it should be a path instead.' +expectFailure 'toSource { root = ./.; fileset = "/some/path"; }' 'lib.fileset.toSource: `fileset` "/some/path" is a string-like value, but it should be a path instead. +\s*Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.' + +# Path coercion errors for non-existent paths +expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: `fileset` '"$work"'/a does not exist.' + +# File sets cannot be evaluated directly +expectFailure '_create ./. null' 'lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.' + +# Future versions of the internal representation are unsupported +expectFailure '_coerce ": value" { _type = "fileset"; _internalVersion = 1; }' ': value is a file set created from a future version of the file set library with a different internal representation: +\s*- Internal version of the file set: 1 +\s*- Internal version of the library: 0 +\s*Make sure to update your Nixpkgs to have a newer version of `lib.fileset`.' + +# _create followed by _coerce should give the inputs back without any validation +expectSuccess '{ + inherit (_coerce "" (_create "base" "tree")) + _internalVersion _internalBase _internalTree; +}' '\{"_internalBase":"base","_internalTree":"tree","_internalVersion":0\}' + +#### Resulting store path #### + +# The store path name should be "source" +expectSuccess 'toSource { root = ./.; fileset = ./.; }' '"'"${NIX_STORE_DIR:-/nix/store}"'/.*-source"' + +# We should be able to import an empty directory and end up with an empty result +tree=( +) +checkFileset './.' + +# Directories recursively containing no files are not included +tree=( + [e/]=0 + [d/e/]=0 + [d/d/e/]=0 + [d/d/f]=1 + [d/f]=1 + [f]=1 +) +checkFileset './.' + +# Check trees that could cause a naïve string prefix checking implementation to fail +tree=( + [a]=0 + [ab/x]=0 + [ab/xy]=1 + [ab/xyz]=0 + [abc]=0 +) +checkFileset './ab/xy' + +# Check path coercion examples in ../../doc/functions/fileset.section.md +tree=( + [a/x]=1 + [a/b/y]=1 + [c/]=0 + [c/d/]=0 +) +checkFileset './.' + +tree=( + [a/x]=1 + [a/b/y]=1 + [c/]=0 + [c/d/]=0 +) +checkFileset './a' + +tree=( + [a/x]=1 + [a/b/y]=0 + [c/]=0 + [c/d/]=0 +) +checkFileset './a/x' + +tree=( + [a/x]=0 + [a/b/y]=1 + [c/]=0 + [c/d/]=0 +) +checkFileset './a/b' + +tree=( + [a/x]=0 + [a/b/y]=0 + [c/]=0 + [c/d/]=0 +) +checkFileset './c' + +# Test the source filter for the somewhat special case of files in the filesystem root +# We can't easily test this with the above functions because we can't write to the filesystem root and we don't want to make any assumptions which files are there in the sandbox +expectSuccess '_toSourceFilter (_create /. null) "/foo" ""' 'false' +expectSuccess '_toSourceFilter (_create /. { foo = "regular"; }) "/foo" ""' 'true' +expectSuccess '_toSourceFilter (_create /. { foo = null; }) "/foo" ""' 'false' + +# TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets + +echo >&2 tests ok diff --git a/lib/tests/release.nix b/lib/tests/release.nix index 805f7a7e95d6..c8d6b810122e 100644 --- a/lib/tests/release.nix +++ b/lib/tests/release.nix @@ -6,6 +6,7 @@ }: let + lib = import ../.; testWithNix = nix: pkgs.runCommand "nixpkgs-lib-tests-nix-${nix.version}" { buildInputs = [ @@ -24,7 +25,7 @@ let ]; nativeBuildInputs = [ nix - ]; + ] ++ lib.optional pkgs.stdenv.isLinux pkgs.inotify-tools; strictDeps = true; } '' datadir="${nix}/share" @@ -50,6 +51,9 @@ let echo "Running lib/tests/sources.sh" TEST_LIB=$PWD/lib bash lib/tests/sources.sh + echo "Running lib/fileset/tests.sh" + TEST_LIB=$PWD/lib bash lib/fileset/tests.sh + echo "Running lib/tests/systems.nix" [[ $(nix-instantiate --eval --strict lib/tests/systems.nix | tee /dev/stderr) == '[ ]' ]];