2024-02-05 15:29:45 +00:00
package libraries
import (
"net/url"
"path"
2024-08-21 08:22:35 +00:00
"regexp"
2024-02-05 15:29:45 +00:00
"strings"
)
// IsLocalPath returns true if the specified path indicates that
// it should be interpreted as a path on the local file system.
//
// The following paths are considered local:
//
// - myfile.txt
// - ./myfile.txt
// - ../myfile.txt
// - file:///foo/bar/myfile.txt
//
// The following paths are considered remote:
//
// - dbfs:/mnt/myfile.txt
// - s3:/mybucket/myfile.txt
// - /Users/jane@doe.com/myfile.txt
func IsLocalPath ( p string ) bool {
// If the path has the explicit file scheme, it's a local path.
if strings . HasPrefix ( p , "file://" ) {
return true
}
// If the path has another scheme, it's a remote path.
if isRemoteStorageScheme ( p ) {
return false
}
// If path starts with /, it's a remote absolute path
return ! path . IsAbs ( p )
}
2024-08-14 09:03:44 +00:00
// IsLibraryLocal returns true if the specified library or environment dependency
2024-04-22 11:44:34 +00:00
// should be interpreted as a local path.
2024-08-14 09:03:44 +00:00
// We use this to check if the dependency in environment spec is local or that library is local.
2024-04-22 11:44:34 +00:00
// We can't use IsLocalPath beacuse environment dependencies can be
// a pypi package name which can be misinterpreted as a local path by IsLocalPath.
2024-08-14 09:03:44 +00:00
func IsLibraryLocal ( dep string ) bool {
2024-08-26 10:03:56 +00:00
if dep == "" {
return false
}
2024-04-22 11:44:34 +00:00
possiblePrefixes := [ ] string {
"." ,
}
for _ , prefix := range possiblePrefixes {
if strings . HasPrefix ( dep , prefix ) {
return true
}
}
2024-10-21 11:45:39 +00:00
// If the dependency starts with --, it's a pip flag option which is a valid
// entry for environment dependencies but not a local path
if containsPipFlag ( dep ) {
return false
}
2024-08-14 09:03:44 +00:00
// If the dependency is a requirements file, it's not a valid local path
if strings . HasPrefix ( dep , "-r" ) {
return false
}
// If the dependency has no extension, it's a PyPi package name
if isPackage ( dep ) {
return false
}
return IsLocalPath ( dep )
}
2024-10-21 11:45:39 +00:00
func containsPipFlag ( input string ) bool {
re := regexp . MustCompile ( ` --[a-zA-Z0-9-]+ ` )
return re . MatchString ( input )
}
2024-08-21 08:22:35 +00:00
// ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_).
// \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security].
2024-08-28 11:39:06 +00:00
// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?): Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1).
// ,?: Optionally matches a comma (,) at the end of the specifier which is used to separate multiple specifiers.
// There can be multiple version specifiers separated by commas or no specifiers.
2024-08-21 08:22:35 +00:00
// Spec for package name and version specifier: https://pip.pypa.io/en/stable/reference/requirement-specifiers/
2024-08-28 11:39:06 +00:00
var packageRegex = regexp . MustCompile ( ` ^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+) { 0,2}(\.\*)?,?)*$ ` )
2024-08-21 08:22:35 +00:00
2024-08-14 09:03:44 +00:00
func isPackage ( name string ) bool {
2024-08-21 08:22:35 +00:00
if packageRegex . MatchString ( name ) {
2024-08-20 09:33:03 +00:00
return true
}
2024-08-21 08:22:35 +00:00
return isUrlBasedLookup ( name )
}
func isUrlBasedLookup ( name string ) bool {
parts := strings . Split ( name , " @ " )
if len ( parts ) != 2 {
return false
}
return packageRegex . MatchString ( parts [ 0 ] ) && isRemoteStorageScheme ( parts [ 1 ] )
2024-04-22 11:44:34 +00:00
}
2024-02-05 15:29:45 +00:00
func isRemoteStorageScheme ( path string ) bool {
url , err := url . Parse ( path )
if err != nil {
return false
}
if url . Scheme == "" {
return false
}
2024-08-14 09:03:44 +00:00
// If the path starts with scheme:/ format (not file), it's a correct remote storage scheme
return strings . HasPrefix ( path , url . Scheme + ":/" ) && url . Scheme != "file"
2024-02-05 15:29:45 +00:00
}