// Copyright 2017 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // The identify_license program tries to identify the license type of an // unknown license. The file containing the license text is specified on the // command line. Multiple license files can be analyzed with a single command. // The type of the license is returned along with the confidence level of the // match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an // exact match and 0.0 indicating a complete mismatch. The results are sorted // by confidence level. // // $ identifylicense ... // LICENSE2: MIT (confidence: 0.987) // LICENSE1: BSD-2-Clause (confidence: 0.833) package main import ( "context" "encoding/json" "flag" "fmt" "strings" //"google3/file/base/go/contrib/walk/walk" //"google3/file/base/go/file" "io/fs" "io/ioutil" "log" "os" "path/filepath" "regexp" "sort" "time" classifier "github.com/google/licenseclassifier/v2" "github.com/google/licenseclassifier/v2/tools/identify_license/backend" "github.com/google/licenseclassifier/v2/tools/identify_license/results" ) var ( headers = flag.Bool("headers", false, "match license headers") jsonFname = flag.String("json", "", "filename to write JSON output to.") includeText = flag.Bool("include_text", false, "include the license text in the JSON output") numTasks = flag.Int("tasks", 1000, "the number of license scanning tasks running concurrently") timeout = flag.Duration("timeout", 24*time.Hour, "timeout before giving up on classifying a file.") tracePhases = flag.String("trace_phases", "", "comma-separated list of phases of the license classifier to trace") traceLicenses = flag.String("trace_licenses", "", "comma-separated list of licenses for the license classifier to trace") ignorePaths = flag.String("ignore_paths_re", "", "comma-separated list of regular expressions that match file paths to ignore") ) // expandFiles recursively returns a list of files stored in a list of // directories. If an input is not a directory, it is added to the output list. func expandFiles(ctx context.Context, paths []string) ([]string, error) { var finalPaths []string ip, err := parseIgnorePaths() if err != nil { return nil, fmt.Errorf("could not parse ignore paths: %v", err) } handleFile := func(path string) { if shouldIgnore(ip, path) { return } finalPaths = append(finalPaths, path) } for _, p := range paths { p, err := filepath.Abs(p) if err != nil { return nil, err } err = filepath.Walk(p, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { if shouldIgnore(ip, info.Name()) { return fs.SkipDir } return nil // walk the directory } handleFile(path) return nil }) if err != nil { return nil, err } } return finalPaths, nil } func shouldIgnore(ignorePaths []*regexp.Regexp, path string) bool { for _, r := range ignorePaths { if exactRegexMatch(r, path) { return true } } return false } func exactRegexMatch(r *regexp.Regexp, s string) bool { m := r.FindStringIndex(s) if m == nil { return false } return (m[0] == 0) && (m[1] == len(s)) } func parseIgnorePaths() (out []*regexp.Regexp, err error) { for _, p := range strings.Split(*ignorePaths, ",") { r, err := regexp.Compile(p) if err != nil { return nil, err } out = append(out, r) } return out, nil } // outputJSON writes the output formatted as JSON to a file. func outputJSON(filename *string, res results.LicenseTypes, includeText bool) error { d, err := results.NewJSONResult(res, includeText) if err != nil { return err } fc, err := json.MarshalIndent(d, "", " ") if err != nil { return err } return ioutil.WriteFile(*filename, fc, 0644) } func init() { flag.Usage = func() { fmt.Fprintf(os.Stderr, `Usage: %s ... Identify an unknown license. Options: `, filepath.Base(os.Args[0])) flag.PrintDefaults() } } func main() { flag.Parse() be, err := backend.New() if err != nil { log.Fatalf("cannot create license classifier: %v", err) } paths, err := expandFiles(context.Background(), flag.Args()) defer be.Close() be.SetTraceConfiguration( &classifier.TraceConfiguration{ TracePhases: *tracePhases, TraceLicenses: *traceLicenses, }) ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() if errs := be.ClassifyLicensesWithContext(ctx, *numTasks, paths, *headers); errs != nil { be.Close() for _, err := range errs { log.Printf("classify license failed: %v", err) } log.Fatal("cannot classify licenses") } results := be.GetResults() if len(results) == 0 { log.Fatal("Couldn't classify license(s)") } sort.Sort(results) for _, r := range results { name := r.Name if r.MatchType != "License" && r.MatchType != "Header" { name = fmt.Sprintf("%s:%s", r.MatchType, r.Name) } fmt.Printf("%s %s (variant: %v, confidence: %v, start: %v, end: %v)\n", r.Filename, name, r.Variant, r.Confidence, r.StartLine, r.EndLine) } if len(*jsonFname) > 0 { err = outputJSON(jsonFname, results, *includeText) if err != nil { log.Fatalf("Couldn't write JSON output to file %s: %v", *jsonFname, err) } } }