diff --git a/pycharm_plugin/src/main/kotlin/com/projspec/toolwindow/HtmlContent.kt b/pycharm_plugin/src/main/kotlin/com/projspec/toolwindow/HtmlContent.kt index 102ce2b..232018f 100644 --- a/pycharm_plugin/src/main/kotlin/com/projspec/toolwindow/HtmlContent.kt +++ b/pycharm_plugin/src/main/kotlin/com/projspec/toolwindow/HtmlContent.kt @@ -645,7 +645,7 @@ body { margin: 0; padding: 0; font-family: var(--vscode-font-family); color: var if (project.file_count != null && project.total_size != null) metaParts.push(parseInt(project.file_count).toLocaleString() + ' files, ' + fmtSize(project.total_size)); if (project.is_writable != null) - metaParts.push(project.is_writable === 'True' ? 'writable' : 'read-only'); + metaParts.push((project.is_writable === true || project.is_writable === 'True') ? 'writable' : 'read-only'); if (project.last_modified != null) { const age = fmtAge(project.last_modified); const by = project.last_modified_by != null ? project.last_modified_by : null; diff --git a/pycharm_plugin/src/main/kotlin/com/projspec/toolwindow/ProjspecToolWindowPanel.kt b/pycharm_plugin/src/main/kotlin/com/projspec/toolwindow/ProjspecToolWindowPanel.kt index 79a0a71..7a3fefa 100644 --- a/pycharm_plugin/src/main/kotlin/com/projspec/toolwindow/ProjspecToolWindowPanel.kt +++ b/pycharm_plugin/src/main/kotlin/com/projspec/toolwindow/ProjspecToolWindowPanel.kt @@ -433,7 +433,7 @@ class ProjspecToolWindowPanel( private fun rescan(url: String) { withBusy { val path = OpenWithHelper.urlToPath(url) - val res = ProjspecRunner.runScan(path) + val res = ProjspecRunner.runScan(path, entryStorageOptions(url)) if (res is CliResult.Failure) { Notifier.warning("projspec scan: ${res.message}", project) } @@ -441,6 +441,23 @@ class ProjspecToolWindowPanel( } } + /** + * The storage_options of the library entry for [url], serialised back to a + * JSON string (or null if absent/empty). Remote projects need these + * re-supplied when the Project is reconstructed on rescan, otherwise the + * filesystem access fails. + */ + private fun entryStorageOptions(url: String): String? { + return try { + @Suppress("UNCHECKED_CAST") + val proj = libraryMap[url] as? Map ?: return null + val so = proj["storage_options"] as? Map ?: return null + if (so.isEmpty()) null else gson.toJson(so) + } catch (_: Exception) { + null + } + } + /** * Show the create-spec modal — but first filter the known spec list by * what is *not* already present in the selected project. Mirrors the @@ -472,7 +489,7 @@ class ProjspecToolWindowPanel( if (createRes is CliResult.Failure) { Notifier.warning("projspec create: ${createRes.message}", project) } - ProjspecRunner.runScan(path) + ProjspecRunner.runScan(path, entryStorageOptions(url)) reload(initial = false) } } diff --git a/pycharm_plugin/src/main/kotlin/com/projspec/util/ProjspecRunner.kt b/pycharm_plugin/src/main/kotlin/com/projspec/util/ProjspecRunner.kt index babfed1..2238b20 100644 --- a/pycharm_plugin/src/main/kotlin/com/projspec/util/ProjspecRunner.kt +++ b/pycharm_plugin/src/main/kotlin/com/projspec/util/ProjspecRunner.kt @@ -32,8 +32,22 @@ object ProjspecRunner { /** `projspec library list --json-out` — returns project library JSON. */ fun runLibraryList(): CliResult = run(listOf(cli, "library", "list", "--json-out")) - /** `projspec scan --library ` — scan & register a directory. */ - fun runScan(path: String): CliResult = run(listOf(cli, "scan", "--library", path)) + /** + * `projspec scan --library ` — scan & register a directory. + * + * *storageOptions*, when non-blank, is forwarded as `--storage_options` + * (a JSON string) so remote projects (s3://, gcs://, …) can be re-scanned + * with their filesystem credentials/flags intact. + */ + fun runScan(path: String, storageOptions: String? = null): CliResult { + val args = mutableListOf(cli, "scan", "--library") + if (!storageOptions.isNullOrBlank()) { + args.add("--storage_options") + args.add(storageOptions) + } + args.add(path) + return run(args) + } /** `projspec create ` — create a new spec inside a project. */ fun runCreate(spec: String, path: String): CliResult = diff --git a/src/projspec/proj/base.py b/src/projspec/proj/base.py index 1766a51..511fcb1 100644 --- a/src/projspec/proj/base.py +++ b/src/projspec/proj/base.py @@ -137,6 +137,21 @@ def is_local(self) -> bool: # see also fsspec.utils.can_be_local for more flexibility with caching. return isinstance(self.fs, fsspec.implementations.local.LocalFileSystem) + @property + def display_url(self) -> str: + """Protocol-qualified URL for display. + + Works even when ``fs`` is ``None`` - e.g. a project deserialised in an + environment lacking the relevant fsspec backend - by falling back to + the stored (already protocol-qualified) path. + """ + if self.fs is not None: + try: + return self.fs.unstrip_protocol(self.url) + except Exception: + pass + return self.url or self.path + @cached_property def _tree_stats(self) -> dict: """Walk the directory tree and collect aggregate statistics. @@ -155,31 +170,49 @@ def _tree_stats(self) -> dict: best_mtime: float | None = None best_info: dict | None = None + def _excluded(name: str) -> bool: + return name in self.excludes or name.startswith((".", "_")) + try: for dirpath, subdirs, files in self.fs.walk( self.url, topdown=True, detail=True ): - # Prune excluded directories in-place (topdown=True makes this work) - # subdirs is a dict when detail=True + # Prune excluded directories in-place so walk() does not recurse + # into them (topdown=True makes in-place mutation effective). + # ``subdirs`` is a dict when the backend honours detail=True, but + # some backends ignore the flag and yield a plain list of names; + # handle both. if isinstance(subdirs, dict): - to_remove = [ - name - for name in list(subdirs) - if name in self.excludes or name.startswith((".", "_")) - ] - for name in to_remove: + for name in [n for n in list(subdirs) if _excluded(n)]: del subdirs[name] - file_infos = files.values() if isinstance(files, dict) else [] + elif isinstance(subdirs, list): + subdirs[:] = [n for n in subdirs if not _excluded(n)] + + # ``files`` is a {name: info} dict when detail is honoured, or a + # list of names otherwise. Normalise to an iterable of + # (name, info-or-None) so file counting works for both - the + # previous code silently counted zero files for list-yielding + # backends (a common cause of "remote project shows 0 files"). + if isinstance(files, dict): + file_items = list(files.items()) + elif isinstance(files, list): + file_items = [(name, None) for name in files] else: - # Some backends yield lists even with detail=True; skip pruning - file_infos = [] + file_items = [] - for finfo in file_infos: + for name, finfo in file_items: + # Resolve a detail dict: provided directly, or fetched + # lazily via info() when the backend only gave us a name. if not isinstance(finfo, dict): + full = f"{dirpath.rstrip('/')}/{name}" if name else dirpath + try: + finfo = self.fs.info(full) + except Exception: + finfo = {} + if finfo.get("type") == "directory": continue - size = finfo.get("size") or 0 file_count += 1 - total_size += size + total_size += finfo.get("size") or 0 mtime = finfo.get("mtime") or finfo.get("LastModified") if mtime is not None: # mtime may be a datetime; normalise to float @@ -347,6 +380,13 @@ def resolve( types = set(camel_to_snake(_) for _ in types or ()) if types and types - set(registry): raise ValueError(f"Unknown types: {set(types) - set(registry)}") + if self.fs is None: + # This project was deserialised without its fsspec backend + # available; we cannot read the filesystem to (re)scan it. + raise RuntimeError( + f"Cannot scan {self.path!r}: the required fsspec backend is not " + "installed in this environment. Install it and try again." + ) # record when this (re)scan happened self.scanned_at = time.time() # sorting to ensure consistency @@ -412,11 +452,7 @@ def basenames(self): def text_summary(self, bare=False) -> str: """Only shows project types, not what they contain""" - txt = ( - self.fs.unstrip_protocol(self.url) - if bare - else f"\n" - ) + txt = self.display_url if bare else f"\n" if not bare: txt += self._stats_line() + "\n" bits = [ @@ -459,11 +495,11 @@ def _stats_line(self) -> str: return " " + " · ".join(parts) if parts else "" def __repr__(self): - return f"" + return f"" def __str__(self): txt = "\n{}\n\n{}".format( - self.fs.unstrip_protocol(self.url), + self.display_url, self._stats_line(), "\n\n".join(str(_) for _ in self.specs.values()), ) @@ -571,10 +607,17 @@ def __contains__(self, item) -> bool: return item in self.specs or any(item in _ for _ in self.children.values()) def to_dict(self, compact=True) -> dict: + # Store the *protocol-qualified* URL (e.g. ``s3://bucket/key``, + # ``memory:///proj``) rather than the protocol-stripped ``self.path``. + # Otherwise deserialisation re-runs ``url_to_fs`` on a bare path and + # wrongly reconstructs a LocalFileSystem, so remote projects get + # interpreted as local (failing to scan / rescan). ``display_url`` + # also works when the fsspec backend is unavailable (``fs is None``). + url = self.display_url dic = AttrDict( specs=self.specs, children=self.children, - url=self.path, + url=url, storage_options=self.storage_options, artifacts=self.artifacts, contents=self.contents, @@ -614,7 +657,17 @@ def from_dict(dic): proj.artifacts = from_dict(dic["artifacts"], proj) proj.path = dic["url"] proj.storage_options = dic["storage_options"] - proj.fs, proj.url = fsspec.url_to_fs(proj.path, **proj.storage_options) + try: + proj.fs, proj.url = fsspec.url_to_fs(proj.path, **proj.storage_options) + except Exception: + # The fsspec backend for this URL may not be installed in the + # current environment (e.g. a library entry for ``s3://...`` loaded + # without ``s3fs``). The project must still be loadable and + # displayable from its cached metadata; only operations that need + # the live filesystem (rescan, file access) should fail. Leave + # ``fs`` unset and keep the (protocol-qualified) URL as-is. + proj.fs = None + proj.url = proj.path scanned_at = dic.get("scanned_at") try: proj.scanned_at = float(scanned_at) diff --git a/src/projspec/qtapp/main.py b/src/projspec/qtapp/main.py index 5f4724e..1a5a000 100644 --- a/src/projspec/qtapp/main.py +++ b/src/projspec/qtapp/main.py @@ -296,7 +296,7 @@ def _action_open_with(self, tool: str, url: str) -> None: _spawn_detached(["jupyter", "lab", local]) def _action_rescan(self, url: str) -> None: - self._scan_and_reload(url, walk=False) + self._rescan(url) def _action_create_spec(self, url: str) -> None: proj = library.entries.get(url) @@ -326,10 +326,12 @@ def _action_create_spec_confirmed(self, url: str, spec: str) -> None: self._set_busy(True) try: path = _url_to_local(url) - proj = projspec.Project(path, walk=False) + existing = library.entries.get(url) + storage_options = dict(getattr(existing, "storage_options", None) or {}) + proj = projspec.Project(path, walk=False, storage_options=storage_options) proj.create(spec) # Rescan and refresh. - fresh = projspec.Project(path, walk=False) + fresh = projspec.Project(path, walk=False, storage_options=storage_options) library.add_entry(path, fresh) self._reload() except Exception as e: @@ -396,11 +398,47 @@ def _action_reveal_file(self, fn: str) -> None: # ── Scan helper ───────────────────────────────────────────────────────── + def _rescan(self, url: str) -> None: + """Re-run ``Project(...)`` for an existing library entry and replace it. + + The original library key (*url*) is preserved so the entry's identity + does not drift (selection in the UI is keyed on it). + + The path used to rebuild the project must keep its protocol so remote + projects re-open against the right filesystem. We prefer the library + key itself when it already carries a protocol (e.g. + ``memory:///proj``, ``s3://bucket/key``) - it is the authoritative + protocol-qualified identifier the UI holds, and is reliable even when + an older serialised library reconstructed the entry's filesystem as + local. Otherwise we fall back to the stored project's + protocol-qualified URL. + """ + if not url: + return + self._set_busy(True) + try: + existing = library.entries.get(url) + storage_options = dict(getattr(existing, "storage_options", None) or {}) + path = _rescan_path(url, existing) + proj = projspec.Project(path, walk=False, storage_options=storage_options) + # Keep the original key so we update the entry in place rather than + # creating a duplicate under a differently-formatted key. + library.add_entry(url, proj) + self._reload() + except Exception as e: + QMessageBox.warning(self, "Rescan", f"Rescan failed: {e}") + finally: + self._set_busy(False) + def _scan_and_reload(self, url: str, walk: bool) -> None: self._set_busy(True) try: path = _url_to_local(url) if url.startswith("file://") else url - proj = projspec.Project(path, walk=walk) + # Re-supply storage_options from the existing library entry (if + # any) so rescanning a remote project keeps working. + existing = library.entries.get(url) + storage_options = dict(getattr(existing, "storage_options", None) or {}) + proj = projspec.Project(path, walk=walk, storage_options=storage_options) if walk: for child_url, child in (proj.children or {}).items(): if child.specs: @@ -426,6 +464,25 @@ def _url_to_local(url: str) -> str: return url +def _rescan_path(url: str, existing) -> str: + """Resolve the path to re-open *url* as a Project, preserving protocol. + + Prefers the library key *url* when it already carries a protocol (it is the + authoritative, protocol-qualified identifier and is reliable even if an + older serialised library reconstructed the entry's filesystem as local). + Otherwise falls back to the stored project's protocol-qualified URL, and + finally to the key itself. + """ + if "://" in url: + return url + if existing is not None: + try: + return existing.fs.unstrip_protocol(existing.url) + except Exception: + return getattr(existing, "path", url) or url + return url + + def _spawn_detached(cmd: list[str]) -> None: """Launch an external tool without blocking the Qt event loop.""" try: diff --git a/src/projspec/textapp/main.py b/src/projspec/textapp/main.py index bff7407..78cd02b 100644 --- a/src/projspec/textapp/main.py +++ b/src/projspec/textapp/main.py @@ -914,6 +914,19 @@ def _url_to_local(url: str) -> str: return url[len("file://") :] if url.startswith("file://") else url +def _entry_storage_options(url: str) -> str: + """Storage options of the library entry *url*, as a JSON string ("" if none). + + Remote projects need their ``storage_options`` re-supplied when the + ``Project`` is reconstructed on rescan, otherwise filesystem access fails. + """ + import json as _json + + proj = library.entries.get(url) + so = getattr(proj, "storage_options", None) or {} + return _json.dumps(so) if so else "" + + def _spawn_detached(cmd: list[str]) -> None: try: subprocess.Popen( @@ -1349,7 +1362,9 @@ def _cb(key: str | None) -> None: elif key == "openJupyter": _spawn_detached(["jupyter", "lab", _url_to_local(url)]) elif key == "rescan": - self._scan_and_reload(url, walk=False) + self._scan_and_reload( + url, walk=False, storage_options=_entry_storage_options(url) + ) elif key == "createSpec": self._open_create_spec(url) elif key == "remove": @@ -1378,9 +1393,11 @@ def _cb(pick: str | None) -> None: self._set_busy(True) try: path = _url_to_local(url) - proj = projspec.Project(path, walk=False) - proj.create(pick) - fresh = projspec.Project(path, walk=False) + proj = library.entries.get(url) + so = getattr(proj, "storage_options", None) or {} + new = projspec.Project(path, walk=False, storage_options=so) + new.create(pick) + fresh = projspec.Project(path, walk=False, storage_options=so) library.add_entry(path, fresh) self.status_message = f"Created {pick} in {path}" self._reload() diff --git a/src/projspec/utils.py b/src/projspec/utils.py index 64dd015..43fd446 100644 --- a/src/projspec/utils.py +++ b/src/projspec/utils.py @@ -99,7 +99,10 @@ def to_dict(obj, compact=True): ) for k, v in obj.items() } - if isinstance(obj, (bytes, str)): + # Preserve JSON-native scalar types as-is so they round-trip with the + # correct type (e.g. True -> true, not "True"). Note bool must be checked + # before/alongside int since bool is a subclass of int. + if isinstance(obj, (bool, int, float, str, bytes)): return obj if isinstance(obj, Iterable): return [to_dict(_, compact=compact) for _ in obj] diff --git a/src/projspec/webui/ipywidget.py b/src/projspec/webui/ipywidget.py index 07b0892..580c7f4 100644 --- a/src/projspec/webui/ipywidget.py +++ b/src/projspec/webui/ipywidget.py @@ -354,22 +354,48 @@ def _add_confirmed(self, path: str, storage_options: str = "") -> None: self._set_busy(False) def _resolve_entry_path(self, url: str) -> str | None: - """Return the filesystem path for the library entry *url*, if any. - - Always prefers the stored ``Project.path`` (which projspec - normalises to an absolute path via fsspec) to the library key. - The library key is an opaque identity used by the UI; reusing it - as a path breaks for entries keyed on a basename or relative - sub-path (e.g. walked children added under the library root). + """Return the path used to re-open the library entry *url*. + + The path must keep its protocol so remote projects (``memory://``, + ``s3://``, …) re-open against the right filesystem. We prefer the + library key *url* when it already carries a protocol - it is the + authoritative, protocol-qualified identifier the UI holds, and is + reliable even when an older serialised library reconstructed the + entry's filesystem as local. Otherwise we use the stored project's + protocol-qualified URL (``fs.unstrip_protocol(proj.url)``), since + ``proj.path``/``proj.url`` have the protocol stripped by + ``fsspec.url_to_fs`` (e.g. ``/proj`` for ``memory://proj``) and a + bare path would resolve against the *local* filesystem. + + The library key is otherwise an opaque identity used by the UI; + reusing it as a path breaks for entries keyed on a basename or + relative sub-path (e.g. walked children added under the library + root), so we only fall back to ``_url_to_local`` when there is no + matching entry. """ + if url and "://" in url: + return url proj = self._library.entries.get(url) if proj is not None and getattr(proj, "path", None): - return proj.path + try: + return proj.fs.unstrip_protocol(proj.url) + except Exception: + return proj.path # Fall back to the URL, minus any ``file://`` scheme prefix, so # the caller still gets *something* usable when there is no # matching entry (e.g., the UI is about to create one). return _url_to_local(url) if url else None + def _entry_storage_options(self, url: str) -> dict: + """Storage options stored on the library entry *url* (or ``{}``). + + Remote projects (s3://, gcs://, authenticated http, …) need their + ``storage_options`` to be re-supplied when reconstructing the + ``Project`` on rescan, otherwise the filesystem access fails. + """ + proj = self._library.entries.get(url) + return dict(getattr(proj, "storage_options", None) or {}) + def _rescan(self, url: str) -> None: """Re-run ``Project(...)`` for the entry *url* and replace it. @@ -391,7 +417,11 @@ def _rescan(self, url: str) -> None: return self._set_busy(True) try: - proj = projspec.Project(path, walk=False) + proj = projspec.Project( + path, + walk=False, + storage_options=self._entry_storage_options(url), + ) # Keep the *original* library key so we don't duplicate the # entry under a different protocol prefix. self._library.entries[url] = proj @@ -425,9 +455,10 @@ def _create_spec_confirmed(self, url: str, spec: str) -> None: return self._set_busy(True) try: - proj = projspec.Project(path, walk=False) + so = self._entry_storage_options(url) + proj = projspec.Project(path, walk=False, storage_options=so) proj.create(spec) - fresh = projspec.Project(path, walk=False) + fresh = projspec.Project(path, walk=False, storage_options=so) # Same key-preservation rule as _rescan. self._library.entries[url] = fresh if self._library.auto_save: diff --git a/src/projspec/webui/panel.js b/src/projspec/webui/panel.js index b1f3904..418fdaa 100644 --- a/src/projspec/webui/panel.js +++ b/src/projspec/webui/panel.js @@ -204,7 +204,7 @@ if (project.file_count != null && project.total_size != null) metaParts.push(parseInt(project.file_count).toLocaleString() + ' files, ' + fmtSize(project.total_size)); if (project.is_writable != null) - metaParts.push(project.is_writable === 'True' ? 'writable' : 'read-only'); + metaParts.push((project.is_writable === true || project.is_writable === 'True') ? 'writable' : 'read-only'); if (project.last_modified != null) { const age = fmtAge(project.last_modified); const by = project.last_modified_by != null ? project.last_modified_by : null; diff --git a/tests/test_basic.py b/tests/test_basic.py index b9f011d..99f4027 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -61,12 +61,171 @@ def test_serialise(proj): projspec.Project.from_dict(json.loads(js)) +def test_serialise_remote_preserves_filesystem(): + """A remote (non-local) project must round-trip through to_dict/from_dict + with its filesystem intact. + + Regression: ``to_dict`` used to store the protocol-stripped ``self.path`` + (e.g. ``/proj`` for ``memory://proj``); ``from_dict`` then re-ran + ``url_to_fs`` on that bare path and reconstructed a *local* filesystem, so + remote projects were wrongly interpreted as local (and failed to scan / + rescan). + """ + import fsspec + + mfs = fsspec.filesystem("memory") + try: + mfs.rm("/ser_remote", recursive=True) + except FileNotFoundError: + pass + mfs.pipe("/ser_remote/pyproject.toml", b'[project]\nname="x"\nversion="0.1"\n') + try: + p = projspec.Project("memory://ser_remote", walk=False) + assert not p.is_local() + + dic = p.to_dict(compact=False) + # protocol must be preserved in the serialised url + assert dic["url"] == "memory:///ser_remote" + + p2 = projspec.Project.from_dict(json.loads(json.dumps(dic))) + # filesystem reconstructed as memory (not local) + assert not p2.is_local() + assert "python_library" in p2.specs + finally: + mfs.rm("/ser_remote", recursive=True) + + def test_pickleable(proj): pickle.dumps(proj) +def test_from_dict_without_backend_loads_and_displays(monkeypatch): + """A project whose fsspec backend is unavailable must still deserialise, + display, and re-serialise from its cached metadata; only operations that + need the live filesystem (rescan) should fail. + """ + import projspec.proj.base as base + + dic = { + "klass": "project", + "specs": {}, + "children": {}, + "contents": {}, + "artifacts": {}, + "url": "s3://my-bucket/my-proj", + "storage_options": {}, + "file_count": 42, + "total_size": 123456, + "is_writable": False, + "last_modified": None, + "last_modified_by": None, + "scanned_at": 1700000000.0, + } + + real_url_to_fs = base.fsspec.url_to_fs + + def boom(path, **kwargs): + if str(path).startswith("s3://"): + raise ImportError("Install s3fs to access S3") + return real_url_to_fs(path, **kwargs) + + monkeypatch.setattr(base.fsspec, "url_to_fs", boom) + + # loads without raising, with no live filesystem + p = projspec.Project.from_dict(dic) + assert p.fs is None + + # displays from cached metadata + assert p.display_url == "s3://my-bucket/my-proj" + assert not p.is_local() + assert repr(p) == "" + assert p.file_count == 42 + assert p.total_size == 123456 + assert p.is_writable is False + assert "s3://my-bucket/my-proj" in p.text_summary() + str(p) # must not raise + + # re-serialises, keeping the protocol-qualified URL + assert p.to_dict(compact=False)["url"] == "s3://my-bucket/my-proj" + + # but a rescan fails with a clear, actionable error + with pytest.raises(RuntimeError, match="backend is not installed"): + p.resolve() + + def test_get_file(proj): # not in scanning by default bool(proj.get_file("README.md")) # scanned bool(proj.get_file("pyproject.toml")) + + +def test_tree_stats_on_memory_fs(): + """File stats are computed correctly on a real (non-local) filesystem. + + ``memory://`` is an in-process fsspec backend, so this exercises the + remote code path (protocol-prefixed URL, fsspec walk) end-to-end rather + than monkey-patching ``walk``. + """ + import fsspec + + mfs = fsspec.filesystem("memory") + try: + mfs.rm("/treestat", recursive=True) + except FileNotFoundError: + pass + + mfs.pipe("/treestat/a.txt", b"hello") + mfs.pipe("/treestat/b.txt", b"data data") + mfs.pipe("/treestat/sub/c.txt", b"world") + # an excluded directory that must NOT be counted + mfs.pipe("/treestat/node_modules/big.js", b"x" * 9999) + + try: + proj = projspec.Project("memory://treestat", walk=False) + # protocol is stripped from the stored path... + assert "://" not in proj.url + # ...but the (remote) tree was walked and counted correctly, + # with node_modules excluded. + assert proj.file_count == 3 + assert proj.total_size == len("hello") + len("data data") + len("world") + finally: + mfs.rm("/treestat", recursive=True) + + +def test_tree_stats_with_list_yielding_walk(tmp_path): + """Regression: some (often remote) backends ignore ``detail=True`` in + ``walk`` and yield plain name-lists. _tree_stats must still count files + rather than reporting zero. + """ + (tmp_path / "a.txt").write_text("hello") + (tmp_path / "b.txt").write_text("data data") + (tmp_path / "sub").mkdir() + (tmp_path / "sub" / "c.txt").write_text("world") + # an excluded directory that must NOT be counted + (tmp_path / "node_modules").mkdir() + (tmp_path / "node_modules" / "big.js").write_text("x" * 9999) + + proj = projspec.Project(str(tmp_path), walk=False) + assert proj.file_count == 3 + assert proj.total_size == len("hello") + len("data data") + len("world") + + # Now wrap fs.walk so it yields lists of names (detail ignored), mimicking + # backends that caused "0 files" before the fix. + proj2 = projspec.Project(str(tmp_path), walk=False) + # fsspec caches filesystem instances per protocol, so patch on the instance + # and restore afterwards to avoid leaking into other tests. + orig_walk = type(proj2.fs).walk + + def list_walk(self, path, **kwargs): + for dirpath, _dirs, _files in orig_walk(self, path, detail=False, topdown=True): + yield dirpath, _dirs, _files + + proj2.fs.walk = list_walk.__get__(proj2.fs, type(proj2.fs)) + try: + proj2.__dict__.pop("_tree_stats", None) + assert proj2.file_count == 3 + assert proj2.total_size == proj.total_size + finally: + # remove the instance override so the shared (cached) fs is clean + del proj2.fs.walk diff --git a/tests/test_ipywidget_helpers.py b/tests/test_ipywidget_helpers.py index 485a481..ce84131 100644 --- a/tests/test_ipywidget_helpers.py +++ b/tests/test_ipywidget_helpers.py @@ -382,6 +382,49 @@ def test_rescan_preserves_library_key(self, tmp_path, widget_and_lib): _fire(widget, "rescan", url=url) assert url in lib.entries # key must be preserved after rescan + def test_rescan_forwards_storage_options(self, tmp_path, widget_and_lib): + """Rescanning a (remote) entry must re-supply its storage_options to + the reconstructed Project, otherwise remote filesystem access fails.""" + import projspec + + widget, lib, url = widget_and_lib + # pretend the stored entry was opened with storage_options + lib.entries[url].storage_options = {"anon": True, "key": "secret"} + + captured = {} + real_project = projspec.Project + + def fake_project(path, *args, **kwargs): + captured["storage_options"] = kwargs.get("storage_options") + return real_project(path, *args, **kwargs) + + with patch("projspec.Project", side_effect=fake_project): + _fire(widget, "rescan", url=url) + + assert captured["storage_options"] == {"anon": True, "key": "secret"} + + def test_create_spec_confirmed_forwards_storage_options( + self, tmp_path, widget_and_lib + ): + """createSpecConfirmed must also re-supply storage_options.""" + import projspec + + widget, lib, url = widget_and_lib + lib.entries[url].storage_options = {"anon": True} + + seen = [] + real_project = projspec.Project + + def fake_project(path, *args, **kwargs): + seen.append(kwargs.get("storage_options")) + return real_project(path, *args, **kwargs) + + with patch("projspec.Project", side_effect=fake_project): + _fire(widget, "createSpecConfirmed", url=url, spec="github_actions") + + # both the create and the re-read Project constructions get the options + assert seen and all(s == {"anon": True} for s in seen) + def test_set_busy_sends_loading_message(self, widget_and_lib): widget, lib, url = widget_and_lib sends = [] @@ -417,10 +460,113 @@ def test_resolve_entry_path_prefers_project_path(self, widget_and_lib): widget, lib, url = widget_and_lib resolved = widget._resolve_entry_path(url) proj = lib.entries[url] - assert resolved == proj.path + # protocol-qualified form of the project URL, so remote projects + # re-open against the right filesystem on rescan + assert resolved == proj.fs.unstrip_protocol(proj.url) + # for a local project that resolves to a file:// URL of the path + assert resolved == "file://" + proj.path + + def test_resolve_entry_path_remote_keeps_protocol(self, tmp_path): + """A remote (memory://) entry resolves to a protocol-qualified URL, + not the protocol-stripped ``proj.path`` (which would resolve against + the local filesystem and fail).""" + pytest.importorskip("anywidget") + import fsspec + import projspec + + mfs = fsspec.filesystem("memory") + try: + mfs.rm("/ipw_rescan", recursive=True) + except FileNotFoundError: + pass + mfs.pipe("/ipw_rescan/pyproject.toml", b'[project]\nname="x"\nversion="0.1"\n') + try: + lib = ProjectLibrary(None, auto_save=False) + proj = projspec.Project("memory://ipw_rescan", walk=False) + key = proj.fs.unstrip_protocol(proj.url) + lib.entries[key] = proj + + widget = lib.ipywidget() + widget.send = lambda c, buffers=None: None + widget._toast = lambda m: None + + resolved = widget._resolve_entry_path(key) + assert resolved == "memory:///ipw_rescan" + + # a full rescan must succeed and keep the same key + detected specs + _fire(widget, "rescan", url=key) + assert key in lib.entries + assert "python_library" in lib.entries[key].specs + finally: + mfs.rm("/ipw_rescan", recursive=True) + + def test_resolve_entry_path_old_library_keeps_protocol(self, tmp_path): + """Even when an older serialised library reconstructed the entry's + filesystem as *local* (because the stored ``url`` lacked a protocol), + the protocol-qualified library *key* must be used to re-open it.""" + pytest.importorskip("anywidget") + import fsspec + import json + import projspec + from projspec.config import temp_conf + + mfs = fsspec.filesystem("memory") + try: + mfs.rm("/ipw_old", recursive=True) + except FileNotFoundError: + pass + mfs.pipe("/ipw_old/pyproject.toml", b'[project]\nname="x"\nversion="0.1"\n') + + # an old-format entry: protocol-qualified key, but stripped ``url`` + old_entry = { + "klass": "project", + "specs": {}, + "children": {}, + "contents": {}, + "artifacts": {}, + "url": "/ipw_old", + "storage_options": {}, + "file_count": 1, + "total_size": 10, + "is_writable": True, + "last_modified": None, + "last_modified_by": None, + "scanned_at": 1.0, + } + libfile = str(tmp_path / "old_lib.json") + with open(libfile, "w") as f: + json.dump({"memory:///ipw_old": old_entry}, f) + + try: + with temp_conf(auto_rescan=0): + lib = ProjectLibrary(libfile, auto_save=False) + # sanity: the reconstructed entry's fs is (wrongly) local here + assert lib.entries["memory:///ipw_old"].is_local() + + widget = lib.ipywidget() + widget.send = lambda c, buffers=None: None + widget._toast = lambda m: None + + # ...but the key's protocol is honoured regardless + assert ( + widget._resolve_entry_path("memory:///ipw_old") == "memory:///ipw_old" + ) + _fire(widget, "rescan", url="memory:///ipw_old") + entry = lib.entries["memory:///ipw_old"] + assert not entry.is_local() + assert "python_library" in entry.specs + finally: + mfs.rm("/ipw_old", recursive=True) def test_resolve_entry_path_falls_back_to_url_to_local(self, widget_and_lib): widget, lib, url = widget_and_lib - # A key that is not in the library gets _url_to_local applied - result = widget._resolve_entry_path("file:///some/other/path") + # A non-protocol key with no matching entry gets _url_to_local applied + result = widget._resolve_entry_path("/some/other/path") assert result == "/some/other/path" + + def test_resolve_entry_path_file_url_kept(self, widget_and_lib): + widget, lib, url = widget_and_lib + # a file:// key already carries a protocol -> returned as-is (Project + # opens file:// URLs fine) + result = widget._resolve_entry_path("file:///some/other/path") + assert result == "file:///some/other/path" diff --git a/tests/test_utils.py b/tests/test_utils.py index ccdc391..47dff2a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -11,6 +11,8 @@ is_installed, sort_version_strings, run_subprocess, + to_dict, + from_dict, ) @@ -54,6 +56,55 @@ def test_enum(): assert st.to_dict()["klass"] == ["enum", "stack"] +def test_to_dict_preserves_json_native_types(): + # JSON-native scalars must keep their type rather than being stringified, + # so storage_options like {"anon": True} serialise to JSON `true`, not + # the string "True". + import json + + src = { + "anon": True, + "use_ssl": False, + "port": 8080, + "ratio": 0.5, + "token": "abc", + "missing": None, + "nested": {"flag": True, "n": 3}, + "list": [True, 1, "x"], + } + d = to_dict(src) + + assert d["anon"] is True + assert d["use_ssl"] is False + assert d["port"] == 8080 and isinstance(d["port"], int) + assert d["ratio"] == 0.5 + assert d["token"] == "abc" + assert d["missing"] is None + assert d["nested"]["flag"] is True + assert d["list"] == [True, 1, "x"] + + # and it survives a real JSON round-trip as native types + restored = json.loads(json.dumps(d)) + assert restored["anon"] is True + assert restored["nested"]["flag"] is True + # from_dict passes scalars through unchanged + assert from_dict(restored)["anon"] is True + + +def test_storage_options_bool_roundtrip(): + # a Project's storage_options booleans must round-trip as real booleans + import json + + p = projspec.Project( + ".", walk=False, storage_options={"anon": True, "use_listings_cache": False} + ) + js = json.dumps(p.to_dict(compact=False)) + assert '"anon": true' in js + p2 = projspec.Project.from_dict(json.loads(js)) + assert p2.storage_options["anon"] is True + assert p2.storage_options["use_listings_cache"] is False + + def test_sort_versions(): vers = ["1", "1.2.3", "1.0.3", "1.10.3", "1.10.3.dev1", "1.10.3.dev"] expected = ["1", "1.0.3", "1.2.3", "1.10.3", "1.10.3.dev", "1.10.3.dev1"] diff --git a/vsextension/src/panel.ts b/vsextension/src/panel.ts index 7149c9c..8b4d761 100644 --- a/vsextension/src/panel.ts +++ b/vsextension/src/panel.ts @@ -263,9 +263,22 @@ export class ProjspecPanel { } } + /** + * The storage_options of the library entry for *url*, as a JSON string + * (or undefined if none). Remote projects need these re-supplied when the + * Project is reconstructed on rescan, otherwise filesystem access fails. + */ + private entryStorageOptions(url: string): string | undefined { + const so = this.library[url]?.storage_options; + if (so && typeof so === 'object' && Object.keys(so).length > 0) { + return JSON.stringify(so); + } + return undefined; + } + private async rescan(url: string): Promise { await this.withBusy(async () => { - const res = await scan(url, true); + const res = await scan(url, true, this.entryStorageOptions(url)); if (res.code !== 0) { vscode.window.showWarningMessage(`projspec scan: ${res.stderr.trim() || 'failed'}`); } @@ -305,7 +318,7 @@ export class ProjspecPanel { vscode.window.showWarningMessage(`projspec create: ${res.stderr.trim() || 'failed'}`); } // Rescan the specific project, then refresh library. - await scan(p, true); + await scan(p, true, this.entryStorageOptions(url)); await this.reload(false); }); } @@ -1022,7 +1035,7 @@ const PANEL_JS = String.raw` if (project.file_count != null && project.total_size != null) metaParts.push(parseInt(project.file_count).toLocaleString() + ' files, ' + fmtSize(project.total_size)); if (project.is_writable != null) - metaParts.push(project.is_writable === 'True' ? 'writable' : 'read-only'); + metaParts.push((project.is_writable === true || project.is_writable === 'True') ? 'writable' : 'read-only'); if (project.last_modified != null) { const age = fmtAge(project.last_modified); const by = project.last_modified_by != null ? project.last_modified_by : null; diff --git a/vsextension/src/projspec.ts b/vsextension/src/projspec.ts index 97ed9a8..57a1c0a 100644 --- a/vsextension/src/projspec.ts +++ b/vsextension/src/projspec.ts @@ -166,12 +166,12 @@ export interface ProjectData { artifacts: Record; children?: Record; klass?: [string, string]; - file_count?: string; - total_size?: string; - is_writable?: string; - last_modified?: string; + file_count?: number | string; + total_size?: number | string; + is_writable?: boolean | string; + last_modified?: number | string; last_modified_by?: string; - scanned_at?: string; + scanned_at?: number | string; } export interface SpecData {