diff --git a/cmd/datasetIngestor/main.go b/cmd/datasetIngestor/main.go index 07f684b..ae5af47 100644 --- a/cmd/datasetIngestor/main.go +++ b/cmd/datasetIngestor/main.go @@ -203,7 +203,10 @@ func main() { /* TODO Add info about policy settings and that autoarchive will take place or not */ - metaDataMap, sourceFolder, beamlineAccount := datasetIngestor.CheckMetadata(client, APIServer, metadatafile, user, accessGroups) + metaDataMap, sourceFolder, beamlineAccount, err := datasetIngestor.CheckMetadata(client, APIServer, metadatafile, user, accessGroups) + if err != nil { + log.Fatal("Error in CheckMetadata function: ", err) + } //log.Printf("metadata object: %v\n", metaDataMap) // assemble list of folders (=datasets) to created diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index 9b297df..c44b261 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -11,55 +11,58 @@ import ( "os" "path/filepath" "strings" - + "errors" "github.com/fatih/color" + "fmt" ) -const DUMMY_TIME = "2300-01-01T11:11:11.000Z" -const DUMMY_OWNER = "x12345" +const ( + ErrIllegalKeys = "metadata contains keys with illegal characters (., [], $, or <>)" + DUMMY_TIME = "2300-01-01T11:11:11.000Z" + DUMMY_OWNER = "x12345" +) -// getHost is a function that attempts to retrieve and return the fully qualified domain name (FQDN) of the current host. -// If it encounters any error during the process, it gracefully falls back to returning the simple hostname or "unknown". -func getHost() string { - // Try to get the hostname of the current machine. - hostname, err := os.Hostname() + +func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool, err error) { + metaDataMap, err = readMetadataFromFile(metadatafile) if err != nil { - return "unknown" + return nil, "", false, err } - addrs, err := net.LookupIP(hostname) + if containsIllegalKeys(metaDataMap) { + return nil, "", false, errors.New(ErrIllegalKeys) + } + + beamlineAccount, err = checkUserAndOwnerGroup(user, accessGroups, metaDataMap) if err != nil { - return hostname + return nil, "", false, err } - for _, addr := range addrs { - if ipv4 := addr.To4(); ipv4 != nil { - ip, err := ipv4.MarshalText() - if err != nil { - return hostname - } - hosts, err := net.LookupAddr(string(ip)) - if err != nil || len(hosts) == 0 { - return hostname - } - fqdn := hosts[0] - return strings.TrimSuffix(fqdn, ".") // return fqdn without trailing dot - } + err = augmentMissingMetadata(user, metaDataMap, client, APIServer, accessGroups) + if err != nil { + return nil, "", false, err } - return hostname + + err = checkMetadataValidity(client, APIServer, metaDataMap, metaDataMap["type"].(string)) + if err != nil { + return nil, "", false, err + } + + sourceFolder, err = getSourceFolder(metaDataMap) + if err != nil { + return nil, "", false, err + } + + return metaDataMap, sourceFolder, beamlineAccount, nil } -// CheckMetadata is a function that validates and augments metadata for a dataset. -// It takes an HTTP client, an API server URL, a metadata file path, a user map, and a list of access groups as input. -// It returns a map of metadata, a source folder string, and a boolean indicating whether the dataset belongs to a beamline account. -func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool) { - // Read the full metadata from the file. +// readMetadataFromFile reads the metadata from the file and unmarshals it into a map. +func readMetadataFromFile(metadatafile string) (map[string]interface{}, error) { b, err := ioutil.ReadFile(metadatafile) // just pass the file name if err != nil { log.Fatal(err) } - // Unmarshal the JSON metadata into an interface{} object. var metadataObj interface{} // Using interface{} allows metadataObj to hold any type of data, since it has no defined methods. err = json.Unmarshal(b, &metadataObj) @@ -68,15 +71,52 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u } // Use type assertion to convert the interface{} object to a map[string]interface{}. - metaDataMap = metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type. - beamlineAccount = false + metaDataMap := metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type. + return metaDataMap, err +} + +func containsIllegalKeys(metadata map[string]interface{}) bool { + for key, value := range metadata { + if containsIllegalCharacters(key) { + return true + } - // Check scientificMetadata for illegal keys - if checkIllegalKeys(metaDataMap) { - panic("Metadata contains keys with illegal characters (., [], $, or <>).") + switch v := value.(type) { + case map[string]interface{}: + if containsIllegalKeys(v) { + return true + } + case []interface{}: + for _, item := range v { + switch itemValue := item.(type) { // Type switch on array item + case map[string]interface{}: + if containsIllegalKeys(itemValue) { + return true + } + // Add other cases if needed + } + } + } + } + return false +} + +func containsIllegalCharacters(s string) bool { + // Check if the string contains periods, brackets, or other illegal characters + // You can adjust this condition based on your specific requirements + for _, char := range s { + if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' || char == '$' { + return true + } } + return false +} - // If the user is not the ingestor, check whether any of the accessGroups equal the ownerGroup. Otherwise, check for beamline-specific accounts. +// checkUserAndOwnerGroup checks the user and owner group and returns whether the user is a beamline account. +// checkUserAndOwnerGroup checks the user and owner group and returns whether the user is a beamline account. +func checkUserAndOwnerGroup(user map[string]string, accessGroups []string, metaDataMap map[string]interface{}) (bool, error) { + beamlineAccount := false + if user["displayName"] != "ingestor" { // Check if the metadata contains the "ownerGroup" key. if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom @@ -88,8 +128,8 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u break } } - if validOwner { - log.Printf("OwnerGroup information %s verified successfully.\n", ownerGroup) + if validOwner { + log.Printf("OwnerGroup information %s verified successfully.\n", ownerGroup) } else { // If the owner group is not valid, check for beamline-specific accounts. if creationLocation, ok := metaDataMap["creationLocation"]; ok { @@ -103,27 +143,62 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u if user["displayName"] == expectedAccount { log.Printf("Beamline specific dataset %s - ingest granted.\n", expectedAccount) beamlineAccount = true - } else { - log.Fatalf("You are neither member of the ownerGroup %s nor the needed beamline account %s", ownerGroup, expectedAccount) - } - } else { - // for other data just check user name - // this is a quick and dirty test. Should be replaced by test for "globalaccess" role. TODO - // facilities: ["SLS", "SINQ", "SWISSFEL", "SmuS"], - u := user["displayName"] - if strings.HasPrefix(u, "sls") || - strings.HasPrefix(u, "swissfel") || - strings.HasPrefix(u, "sinq") || - strings.HasPrefix(u, "smus") { - beamlineAccount = true + } else { + return false, fmt.Errorf("you are neither member of the ownerGroup %s nor the needed beamline account %s", ownerGroup, expectedAccount) + } + } else { + // for other data just check user name + // this is a quick and dirty test. Should be replaced by test for "globalaccess" role. TODO + // facilities: ["SLS", "SINQ", "SWISSFEL", "SmuS"], + u := user["displayName"] + if strings.HasPrefix(u, "sls") || + strings.HasPrefix(u, "swissfel") || + strings.HasPrefix(u, "sinq") || + strings.HasPrefix(u, "smus") { + beamlineAccount = true + } + } } } } - } + + return beamlineAccount, nil +} + +// getHost is a function that attempts to retrieve and return the fully qualified domain name (FQDN) of the current host. +// If it encounters any error during the process, it gracefully falls back to returning the simple hostname or "unknown". +func getHost() string { + // Try to get the hostname of the current machine. + hostname, err := os.Hostname() + if err != nil { + return "unknown" + } + + addrs, err := net.LookupIP(hostname) + if err != nil { + return hostname } - // Check if ownerGroup is in accessGroups list + for _, addr := range addrs { + if ipv4 := addr.To4(); ipv4 != nil { + ip, err := ipv4.MarshalText() + if err != nil { + return hostname + } + hosts, err := net.LookupAddr(string(ip)) + if err != nil || len(hosts) == 0 { + return hostname + } + fqdn := hosts[0] + return strings.TrimSuffix(fqdn, ".") // return fqdn without trailing dot + } + } + return hostname +} +// augmentMissingMetadata augments missing metadata fields. +func augmentMissingMetadata(user map[string]string, metaDataMap map[string]interface{}, client *http.Client, APIServer string, accessGroups []string) error { + color.Set(color.FgGreen) // optionally augment missing owner metadata if _, ok := metaDataMap["owner"]; !ok { @@ -150,169 +225,141 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u } // for raw data add PI if missing if val, ok := metaDataMap["type"]; ok { - dstype := val.(string) + dstype, ok := val.(string) + if !ok { + return fmt.Errorf("type is not a string") + } if dstype == "raw" { if _, ok := metaDataMap["principalInvestigator"]; !ok { val, ok := metaDataMap["ownerGroup"] if ok { - ownerGroup := val.(string) + ownerGroup, ok := val.(string) + if !ok { + return fmt.Errorf("ownerGroup is not a string") + } proposal := datasetUtils.GetProposal(client, APIServer, ownerGroup, user, accessGroups) if val, ok := proposal["pi_email"]; ok { - metaDataMap["principalInvestigator"] = val.(string) + piEmail, ok := val.(string) + if !ok { + return fmt.Errorf("pi_email is not a string") + } + metaDataMap["principalInvestigator"] = piEmail log.Printf("principalInvestigator field added: %s", metaDataMap["principalInvestigator"]) - } else { - color.Set(color.FgRed) - log.Printf("principalInvestigator field missing for raw data and could not be added from proposal data.") - log.Printf("Please add the field explicitly to metadata file") - color.Unset() + } else { + log.Printf("principalInvestigator field missing for raw data and could not be added from proposal data.") + log.Printf("Please add the field explicitly to metadata file") + } } } } } - } - - color.Unset() - var bmm []byte - if val, ok := metaDataMap["type"]; ok { - dstype := val.(string) - // fmt.Println(errm,sourceFolder) + return nil +} - // verify data structure of meta data by calling isValid API for Dataset +// checkMetadataValidity checks the validity of the metadata by calling the appropriate API. +func checkMetadataValidity(client *http.Client, APIServer string, metaDataMap map[string]interface{}, dstype string) error { + myurl := "" + switch dstype { + case "raw": + myurl = APIServer + "/RawDatasets/isValid" + case "derived": + myurl = APIServer + "/DerivedDatasets/isValid" + case "base": + myurl = APIServer + "/Datasets/isValid" + default: + return fmt.Errorf("unknown dataset type encountered: %s", dstype) + } - myurl := "" - if dstype == "raw" { - myurl = APIServer + "/RawDatasets/isValid" - } else if dstype == "derived" { - myurl = APIServer + "/DerivedDatasets/isValid" - } else if dstype == "base" { - myurl = APIServer + "/Datasets/isValid" - } else { - log.Fatal("Unknown dataset type encountered:", dstype) - } + // add dummy data for fields which can only be filled after file scan to pass the validity test - // add dummy data for fields which can only be filled after file scan to pass the validity test - - if _, exists := metaDataMap["ownerGroup"]; !exists { - metaDataMap["ownerGroup"] = DUMMY_OWNER - } - if _, exists := metaDataMap["creationTime"]; !exists { - metaDataMap["creationTime"] = DUMMY_TIME - } - if metaDataMap["type"] == "raw" { - if _, exists := metaDataMap["endTime"]; !exists { - metaDataMap["endTime"] = DUMMY_TIME - } + if _, exists := metaDataMap["ownerGroup"]; !exists { + metaDataMap["ownerGroup"] = DUMMY_OWNER + } + if _, exists := metaDataMap["creationTime"]; !exists { + metaDataMap["creationTime"] = DUMMY_TIME + } + if metaDataMap["type"] == "raw" { + if _, exists := metaDataMap["endTime"]; !exists { + metaDataMap["endTime"] = DUMMY_TIME } + } - // add accessGroups entry for beamline if creationLocation is defined + // add accessGroups entry for beamline if creationLocation is defined - if value, exists := metaDataMap["creationLocation"]; exists { - var parts = strings.Split(value.(string), "/") - var groups []string - if len(parts) == 4 { - newGroup := strings.ToLower(parts[2]) + strings.ToLower(parts[3]) + if value, exists := metaDataMap["creationLocation"]; exists { + var parts = strings.Split(value.(string), "/") + var groups []string + if len(parts) == 4 { + newGroup := strings.ToLower(parts[2]) + strings.ToLower(parts[3]) - if ag, exists := metaDataMap["accessGroups"]; exists { - // a direct typecast does not work, this loop is needed - aInterface := ag.([]interface{}) - aString := make([]string, len(aInterface)) - for i, v := range aInterface { - aString[i] = v.(string) - } - groups = append(aString, newGroup) - } else { - groups = append(groups, newGroup) + if ag, exists := metaDataMap["accessGroups"]; exists { + // a direct typecast does not work, this loop is needed + aInterface := ag.([]interface{}) + aString := make([]string, len(aInterface)) + for i, v := range aInterface { + aString[i] = v.(string) } + groups = append(aString, newGroup) + } else { + groups = append(groups, newGroup) } - metaDataMap["accessGroups"] = groups } + metaDataMap["accessGroups"] = groups + } - bmm, _ = json.Marshal(metaDataMap) - //fmt.Printf("Marshalled meta data : %s\n", string(bmm)) - // now check validity - req, err := http.NewRequest("POST", myurl, bytes.NewBuffer(bmm)) - if err != nil { - log.Fatal(err) - } - req.Header.Set("Content-Type", "application/json") - resp, err := client.Do(req) - if err != nil { - log.Fatal(err) - } - defer resp.Body.Close() + bmm, err := json.Marshal(metaDataMap) + if err != nil { + return err + } + + req, err := http.NewRequest("POST", myurl, bytes.NewBuffer(bmm)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") - body, _ := ioutil.ReadAll(resp.Body) + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() - // check validity - var respObj interface{} - err = json.Unmarshal(body, &respObj) - if err != nil { - log.Fatal(err) - } - respMap := respObj.(map[string]interface{}) - if respMap["valid"] != true { - log.Fatal("response Body:", string(body)) - } - } else { - log.Fatal("Undefined type field") + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("unexpected status code: %d", resp.StatusCode) } - sourceFolder = "" - if val, ok := metaDataMap["sourceFolder"]; ok { - // turn sourceFolder into canonical form but only for online data /sls/BL/data form - sourceFolder = val.(string) - var parts = strings.Split(val.(string), "/") - if len(parts) > 3 && parts[3] == "data" && parts[1] == "sls" { - sourceFolder, err = filepath.EvalSymlinks(val.(string)) - if err != nil { - log.Fatalf("Failed to find canonical form of sourceFolder:%v %v", val, err) - } - color.Set(color.FgYellow) - log.Printf("Transform sourceFolder %v to canonical form: %v", val, sourceFolder) - color.Unset() - metaDataMap["sourceFolder"] = sourceFolder - } - } else { - log.Fatal("Undefined sourceFolder field") + _, err = ioutil.ReadAll(resp.Body) + if err != nil { + return err } - return metaDataMap, sourceFolder, beamlineAccount + return nil } -func checkIllegalKeys(metadata map[string]interface{}) bool { - for key, value := range metadata { - if containsIllegalCharacters(key) { - return true - } +// getSourceFolder gets the source folder from the metadata. +func getSourceFolder(metaDataMap map[string]interface{}) (string, error) { + sourceFolder := "" + val, ok := metaDataMap["sourceFolder"] + if !ok { + return "", errors.New("undefined sourceFolder field") + } - switch v := value.(type) { - case map[string]interface{}: - if checkIllegalKeys(v) { - return true - } - case []interface{}: - for _, item := range v { - switch itemValue := item.(type) { // Type switch on array item - case map[string]interface{}: - if checkIllegalKeys(itemValue) { - return true - } - // Add other cases if needed - } - } - } - } - return false -} + sourceFolder, ok = val.(string) + if !ok { + return "", errors.New("sourceFolder is not a string") + } -func containsIllegalCharacters(s string) bool { - // Check if the string contains periods, brackets, or other illegal characters - // You can adjust this condition based on your specific requirements - for _, char := range s { - if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' || char == '$' { - return true - } - } - return false -} + parts := strings.Split(sourceFolder, "/") + if len(parts) > 3 && parts[3] == "data" && parts[1] == "sls" { + var err error + sourceFolder, err = filepath.EvalSymlinks(sourceFolder) + if err != nil { + return "", fmt.Errorf("failed to find canonical form of sourceFolder:%v %v", sourceFolder, err) + } + log.Printf("Transform sourceFolder %v to canonical form: %v", val, sourceFolder) + metaDataMap["sourceFolder"] = sourceFolder + } + + return sourceFolder, nil +} \ No newline at end of file diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index d277f2c..a94a6d6 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -52,9 +52,11 @@ func TestCheckMetadata(t *testing.T) { // Mock access groups accessGroups := []string{"group1", "group2"} - // Call the function with mock parameters - metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) + metaDataMap, sourceFolder, beamlineAccount, err := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) + if err != nil { + t.Error("Error in CheckMetadata function: ", err) + } // Add assertions here based on the expected behavior of the function if len(metaDataMap) == 0 { @@ -98,7 +100,10 @@ func TestCheckMetadata(t *testing.T) { } // test with the second metadata file - metaDataMap2, sourceFolder2, beamlineAccount2 := CheckMetadata(client, APIServer, metadatafile2, user, accessGroups) + metaDataMap2, sourceFolder2, beamlineAccount2, err := CheckMetadata(client, APIServer, metadatafile2, user, accessGroups) + if err != nil { + t.Error("Error in CheckMetadata function: ", err) + } // Add assertions here based on the expected behavior of the function if len(metaDataMap2) == 0 { @@ -119,42 +124,41 @@ func TestCheckMetadata(t *testing.T) { } func TestCheckMetadata_CrashCase(t *testing.T) { - defer func() { - if recover() != nil { - t.Log("Function crashed as expected") - } else { - t.Fatal("Function did not crash as expected") - } - }() - - // Define mock parameters for the function - var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" - var APIServer = TEST_API_SERVER - var metadatafile3 = "testdata/metadata_illegal.json" - - // Mock HTTP client - client := &http.Client{ - Timeout: 5 * time.Second, // Set a timeout for requests - Transport: &http.Transport{ - // Customize the transport settings if needed (e.g., proxy, TLS config) - // For a dummy client, default settings are usually sufficient - }, - CheckRedirect: func(req *http.Request, via []*http.Request) error { - // Customize how redirects are handled if needed - // For a dummy client, default behavior is usually sufficient - return http.ErrUseLastResponse // Use the last response for redirects - }, - } - - // Mock user map - user := map[string]string{ - "displayName": "csaxsswissfel", - "mail": "testuser@example.com", - } - - // Mock access groups - accessGroups := []string{"group1", "group2"} - // Call the function that should crash - CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) + // Define mock parameters for the function + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // TODO: Test Improvement. Change this to a mock server. At the moment, tests will fail if we change this to a mock server. + var APIServer = TEST_API_SERVER + var metadatafile3 = "testdata/metadata_illegal.json" + + // Mock HTTP client + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, + } + + // Mock user map + user := map[string]string{ + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + } + + // Mock access groups + accessGroups := []string{"group1", "group2"} + + // Call the function that should return an error + _, _, _, err := CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) + + // Check that the function returned the expected error + if err == nil { + t.Fatal("Function did not return an error as expected") + } else if err.Error() != ErrIllegalKeys { + t.Errorf("Expected error %q, got %q", ErrIllegalKeys, err.Error()) + } } - diff --git a/datasetIngestor/testdata/metadata-short.json b/datasetIngestor/testdata/metadata-short.json index 4912d85..544ea07 100644 --- a/datasetIngestor/testdata/metadata-short.json +++ b/datasetIngestor/testdata/metadata-short.json @@ -5,5 +5,4 @@ "owner": "first last", "type": "raw", "ownerGroup": "group1" - } diff --git a/datasetIngestor/testdata/metadata.json b/datasetIngestor/testdata/metadata.json index 036b05e..303c22d 100644 --- a/datasetIngestor/testdata/metadata.json +++ b/datasetIngestor/testdata/metadata.json @@ -6,7 +6,6 @@ "ownerEmail": "test@example.com", "ownerGroup": "group1", "principalInvestigator": "test@example.com", - "scientificMetadata": [ { "sample": {