Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ body { margin: 0; padding: 0; font-family: var(--vscode-font-family); color: var
if (project.file_count != null && project.total_size != null)
metaParts.push(parseInt(project.file_count).toLocaleString() + ' files, ' + fmtSize(project.total_size));
if (project.is_writable != null)
metaParts.push(project.is_writable === 'True' ? 'writable' : 'read-only');
metaParts.push((project.is_writable === true || project.is_writable === 'True') ? 'writable' : 'read-only');
if (project.last_modified != null) {
const age = fmtAge(project.last_modified);
const by = project.last_modified_by != null ? project.last_modified_by : null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -433,14 +433,31 @@ class ProjspecToolWindowPanel(
private fun rescan(url: String) {
withBusy {
val path = OpenWithHelper.urlToPath(url)
val res = ProjspecRunner.runScan(path)
val res = ProjspecRunner.runScan(path, entryStorageOptions(url))
if (res is CliResult.Failure) {
Notifier.warning("projspec scan: ${res.message}", project)
}
reload(initial = false)
}
}

/**
* The storage_options of the library entry for [url], serialised back to a
* JSON string (or null if absent/empty). Remote projects need these
* re-supplied when the Project is reconstructed on rescan, otherwise the
* filesystem access fails.
*/
private fun entryStorageOptions(url: String): String? {
return try {
@Suppress("UNCHECKED_CAST")
val proj = libraryMap[url] as? Map<String, Any?> ?: return null
val so = proj["storage_options"] as? Map<String, Any?> ?: return null
if (so.isEmpty()) null else gson.toJson(so)
} catch (_: Exception) {
null
}
}

/**
* Show the create-spec modal — but first filter the known spec list by
* what is *not* already present in the selected project. Mirrors the
Expand Down Expand Up @@ -472,7 +489,7 @@ class ProjspecToolWindowPanel(
if (createRes is CliResult.Failure) {
Notifier.warning("projspec create: ${createRes.message}", project)
}
ProjspecRunner.runScan(path)
ProjspecRunner.runScan(path, entryStorageOptions(url))
reload(initial = false)
}
}
Expand Down
18 changes: 16 additions & 2 deletions pycharm_plugin/src/main/kotlin/com/projspec/util/ProjspecRunner.kt
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,22 @@ object ProjspecRunner {
/** `projspec library list --json-out` — returns project library JSON. */
fun runLibraryList(): CliResult = run(listOf(cli, "library", "list", "--json-out"))

/** `projspec scan --library <path>` — scan & register a directory. */
fun runScan(path: String): CliResult = run(listOf(cli, "scan", "--library", path))
/**
* `projspec scan --library <path>` — scan & register a directory.
*
* *storageOptions*, when non-blank, is forwarded as `--storage_options`
* (a JSON string) so remote projects (s3://, gcs://, …) can be re-scanned
* with their filesystem credentials/flags intact.
*/
fun runScan(path: String, storageOptions: String? = null): CliResult {
val args = mutableListOf(cli, "scan", "--library")
if (!storageOptions.isNullOrBlank()) {
args.add("--storage_options")
args.add(storageOptions)
}
args.add(path)
return run(args)
}

/** `projspec create <spec> <path>` — create a new spec inside a project. */
fun runCreate(spec: String, path: String): CliResult =
Expand Down
99 changes: 76 additions & 23 deletions src/projspec/proj/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,21 @@ def is_local(self) -> bool:
# see also fsspec.utils.can_be_local for more flexibility with caching.
return isinstance(self.fs, fsspec.implementations.local.LocalFileSystem)

@property
def display_url(self) -> str:
"""Protocol-qualified URL for display.

Works even when ``fs`` is ``None`` - e.g. a project deserialised in an
environment lacking the relevant fsspec backend - by falling back to
the stored (already protocol-qualified) path.
"""
if self.fs is not None:
try:
return self.fs.unstrip_protocol(self.url)
except Exception:
pass
return self.url or self.path

@cached_property
def _tree_stats(self) -> dict:
"""Walk the directory tree and collect aggregate statistics.
Expand All @@ -155,31 +170,49 @@ def _tree_stats(self) -> dict:
best_mtime: float | None = None
best_info: dict | None = None

def _excluded(name: str) -> bool:
return name in self.excludes or name.startswith((".", "_"))

try:
for dirpath, subdirs, files in self.fs.walk(
self.url, topdown=True, detail=True
):
# Prune excluded directories in-place (topdown=True makes this work)
# subdirs is a dict when detail=True
# Prune excluded directories in-place so walk() does not recurse
# into them (topdown=True makes in-place mutation effective).
# ``subdirs`` is a dict when the backend honours detail=True, but
# some backends ignore the flag and yield a plain list of names;
# handle both.
if isinstance(subdirs, dict):
to_remove = [
name
for name in list(subdirs)
if name in self.excludes or name.startswith((".", "_"))
]
for name in to_remove:
for name in [n for n in list(subdirs) if _excluded(n)]:
del subdirs[name]
file_infos = files.values() if isinstance(files, dict) else []
elif isinstance(subdirs, list):
subdirs[:] = [n for n in subdirs if not _excluded(n)]

# ``files`` is a {name: info} dict when detail is honoured, or a
# list of names otherwise. Normalise to an iterable of
# (name, info-or-None) so file counting works for both - the
# previous code silently counted zero files for list-yielding
# backends (a common cause of "remote project shows 0 files").
if isinstance(files, dict):
file_items = list(files.items())
elif isinstance(files, list):
file_items = [(name, None) for name in files]
else:
# Some backends yield lists even with detail=True; skip pruning
file_infos = []
file_items = []

for finfo in file_infos:
for name, finfo in file_items:
# Resolve a detail dict: provided directly, or fetched
# lazily via info() when the backend only gave us a name.
if not isinstance(finfo, dict):
full = f"{dirpath.rstrip('/')}/{name}" if name else dirpath
try:
finfo = self.fs.info(full)
except Exception:
finfo = {}
if finfo.get("type") == "directory":
continue
size = finfo.get("size") or 0
file_count += 1
total_size += size
total_size += finfo.get("size") or 0
mtime = finfo.get("mtime") or finfo.get("LastModified")
if mtime is not None:
# mtime may be a datetime; normalise to float
Expand Down Expand Up @@ -347,6 +380,13 @@ def resolve(
types = set(camel_to_snake(_) for _ in types or ())
if types and types - set(registry):
raise ValueError(f"Unknown types: {set(types) - set(registry)}")
if self.fs is None:
# This project was deserialised without its fsspec backend
# available; we cannot read the filesystem to (re)scan it.
raise RuntimeError(
f"Cannot scan {self.path!r}: the required fsspec backend is not "
"installed in this environment. Install it and try again."
)
# record when this (re)scan happened
self.scanned_at = time.time()
# sorting to ensure consistency
Expand Down Expand Up @@ -412,11 +452,7 @@ def basenames(self):

def text_summary(self, bare=False) -> str:
"""Only shows project types, not what they contain"""
txt = (
self.fs.unstrip_protocol(self.url)
if bare
else f"<Project '{self.fs.unstrip_protocol(self.url)}'>\n"
)
txt = self.display_url if bare else f"<Project '{self.display_url}'>\n"
if not bare:
txt += self._stats_line() + "\n"
bits = [
Expand Down Expand Up @@ -459,11 +495,11 @@ def _stats_line(self) -> str:
return " " + " · ".join(parts) if parts else ""

def __repr__(self):
return f"<Project '{self.fs.unstrip_protocol(self.url)}'>"
return f"<Project '{self.display_url}'>"

def __str__(self):
txt = "<Project '{}'>\n{}\n\n{}".format(
self.fs.unstrip_protocol(self.url),
self.display_url,
self._stats_line(),
"\n\n".join(str(_) for _ in self.specs.values()),
)
Expand Down Expand Up @@ -571,10 +607,17 @@ def __contains__(self, item) -> bool:
return item in self.specs or any(item in _ for _ in self.children.values())

def to_dict(self, compact=True) -> dict:
# Store the *protocol-qualified* URL (e.g. ``s3://bucket/key``,
# ``memory:///proj``) rather than the protocol-stripped ``self.path``.
# Otherwise deserialisation re-runs ``url_to_fs`` on a bare path and
# wrongly reconstructs a LocalFileSystem, so remote projects get
# interpreted as local (failing to scan / rescan). ``display_url``
# also works when the fsspec backend is unavailable (``fs is None``).
url = self.display_url
dic = AttrDict(
specs=self.specs,
children=self.children,
url=self.path,
url=url,
storage_options=self.storage_options,
artifacts=self.artifacts,
contents=self.contents,
Expand Down Expand Up @@ -614,7 +657,17 @@ def from_dict(dic):
proj.artifacts = from_dict(dic["artifacts"], proj)
proj.path = dic["url"]
proj.storage_options = dic["storage_options"]
proj.fs, proj.url = fsspec.url_to_fs(proj.path, **proj.storage_options)
try:
proj.fs, proj.url = fsspec.url_to_fs(proj.path, **proj.storage_options)
except Exception:
# The fsspec backend for this URL may not be installed in the
# current environment (e.g. a library entry for ``s3://...`` loaded
# without ``s3fs``). The project must still be loadable and
# displayable from its cached metadata; only operations that need
# the live filesystem (rescan, file access) should fail. Leave
# ``fs`` unset and keep the (protocol-qualified) URL as-is.
proj.fs = None
proj.url = proj.path
scanned_at = dic.get("scanned_at")
try:
proj.scanned_at = float(scanned_at)
Expand Down
65 changes: 61 additions & 4 deletions src/projspec/qtapp/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def _action_open_with(self, tool: str, url: str) -> None:
_spawn_detached(["jupyter", "lab", local])

def _action_rescan(self, url: str) -> None:
self._scan_and_reload(url, walk=False)
self._rescan(url)

def _action_create_spec(self, url: str) -> None:
proj = library.entries.get(url)
Expand Down Expand Up @@ -326,10 +326,12 @@ def _action_create_spec_confirmed(self, url: str, spec: str) -> None:
self._set_busy(True)
try:
path = _url_to_local(url)
proj = projspec.Project(path, walk=False)
existing = library.entries.get(url)
storage_options = dict(getattr(existing, "storage_options", None) or {})
proj = projspec.Project(path, walk=False, storage_options=storage_options)
proj.create(spec)
# Rescan and refresh.
fresh = projspec.Project(path, walk=False)
fresh = projspec.Project(path, walk=False, storage_options=storage_options)
library.add_entry(path, fresh)
self._reload()
except Exception as e:
Expand Down Expand Up @@ -396,11 +398,47 @@ def _action_reveal_file(self, fn: str) -> None:

# ── Scan helper ─────────────────────────────────────────────────────────

def _rescan(self, url: str) -> None:
"""Re-run ``Project(...)`` for an existing library entry and replace it.

The original library key (*url*) is preserved so the entry's identity
does not drift (selection in the UI is keyed on it).

The path used to rebuild the project must keep its protocol so remote
projects re-open against the right filesystem. We prefer the library
key itself when it already carries a protocol (e.g.
``memory:///proj``, ``s3://bucket/key``) - it is the authoritative
protocol-qualified identifier the UI holds, and is reliable even when
an older serialised library reconstructed the entry's filesystem as
local. Otherwise we fall back to the stored project's
protocol-qualified URL.
"""
if not url:
return
self._set_busy(True)
try:
existing = library.entries.get(url)
storage_options = dict(getattr(existing, "storage_options", None) or {})
path = _rescan_path(url, existing)
proj = projspec.Project(path, walk=False, storage_options=storage_options)
# Keep the original key so we update the entry in place rather than
# creating a duplicate under a differently-formatted key.
library.add_entry(url, proj)
self._reload()
except Exception as e:
QMessageBox.warning(self, "Rescan", f"Rescan failed: {e}")
finally:
self._set_busy(False)

def _scan_and_reload(self, url: str, walk: bool) -> None:
self._set_busy(True)
try:
path = _url_to_local(url) if url.startswith("file://") else url
proj = projspec.Project(path, walk=walk)
# Re-supply storage_options from the existing library entry (if
# any) so rescanning a remote project keeps working.
existing = library.entries.get(url)
storage_options = dict(getattr(existing, "storage_options", None) or {})
proj = projspec.Project(path, walk=walk, storage_options=storage_options)
if walk:
for child_url, child in (proj.children or {}).items():
if child.specs:
Expand All @@ -426,6 +464,25 @@ def _url_to_local(url: str) -> str:
return url


def _rescan_path(url: str, existing) -> str:
"""Resolve the path to re-open *url* as a Project, preserving protocol.

Prefers the library key *url* when it already carries a protocol (it is the
authoritative, protocol-qualified identifier and is reliable even if an
older serialised library reconstructed the entry's filesystem as local).
Otherwise falls back to the stored project's protocol-qualified URL, and
finally to the key itself.
"""
if "://" in url:
return url
if existing is not None:
try:
return existing.fs.unstrip_protocol(existing.url)
except Exception:
return getattr(existing, "path", url) or url
return url


def _spawn_detached(cmd: list[str]) -> None:
"""Launch an external tool without blocking the Qt event loop."""
try:
Expand Down
25 changes: 21 additions & 4 deletions src/projspec/textapp/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,19 @@ def _url_to_local(url: str) -> str:
return url[len("file://") :] if url.startswith("file://") else url


def _entry_storage_options(url: str) -> str:
"""Storage options of the library entry *url*, as a JSON string ("" if none).

Remote projects need their ``storage_options`` re-supplied when the
``Project`` is reconstructed on rescan, otherwise filesystem access fails.
"""
import json as _json

proj = library.entries.get(url)
so = getattr(proj, "storage_options", None) or {}
return _json.dumps(so) if so else ""


def _spawn_detached(cmd: list[str]) -> None:
try:
subprocess.Popen(
Expand Down Expand Up @@ -1349,7 +1362,9 @@ def _cb(key: str | None) -> None:
elif key == "openJupyter":
_spawn_detached(["jupyter", "lab", _url_to_local(url)])
elif key == "rescan":
self._scan_and_reload(url, walk=False)
self._scan_and_reload(
url, walk=False, storage_options=_entry_storage_options(url)
)
elif key == "createSpec":
self._open_create_spec(url)
elif key == "remove":
Expand Down Expand Up @@ -1378,9 +1393,11 @@ def _cb(pick: str | None) -> None:
self._set_busy(True)
try:
path = _url_to_local(url)
proj = projspec.Project(path, walk=False)
proj.create(pick)
fresh = projspec.Project(path, walk=False)
proj = library.entries.get(url)
so = getattr(proj, "storage_options", None) or {}
new = projspec.Project(path, walk=False, storage_options=so)
new.create(pick)
fresh = projspec.Project(path, walk=False, storage_options=so)
library.add_entry(path, fresh)
self.status_message = f"Created {pick} in {path}"
self._reload()
Expand Down
5 changes: 4 additions & 1 deletion src/projspec/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,10 @@ def to_dict(obj, compact=True):
)
for k, v in obj.items()
}
if isinstance(obj, (bytes, str)):
# Preserve JSON-native scalar types as-is so they round-trip with the
# correct type (e.g. True -> true, not "True"). Note bool must be checked
# before/alongside int since bool is a subclass of int.
if isinstance(obj, (bool, int, float, str, bytes)):
return obj
if isinstance(obj, Iterable):
return [to_dict(_, compact=compact) for _ in obj]
Expand Down
Loading
Loading