//
// App.swift
// TextParser
//
// Created by pat on 12/3/22.
//
import ArgumentParser
import Foundation
import NaturalLanguage
@main
struct App: ParsableCommand {
@Argument(help: "The text you want to analyze")
var input: [String]
@Flag(help: "Show detected language.")
var detectLanguage = false
@Flag(help: "Prints the input's sentiment valence.")
var sentimentAnalysis = false
@Flag(help: "Shows the stem form of each word.")
var toLemmatize = false
@Flag(help: "Prints alternative words for each word in the input.")
var alternatives = false
@Flag(help: "Prints names of people, places, and organizations in the input.")
var names = false
mutating func run() {
if detectLanguage == false && sentimentAnalysis == false && toLemmatize == false && alternatives == false && names == false {
detectLanguage = true
sentimentAnalysis = true
toLemmatize = true
alternatives = true
names = true
}
print("◊ Starting TextParser now. ◊")
let text = input.joined(separator: " ")
if detectLanguage {
let language = NLLanguageRecognizer.dominantLanguage(for: text) ?? .undetermined
print()
print("Detected language: \(language.rawValue)")
print(text)
}
if sentimentAnalysis {
let sentiment: Double = sentiment(for: text)
print()
print("Sentiment analysis: \(sentiment)")
}
if names {
let entities = entities(for: text)
print()
print("Found the following entities:")
for entity in entities { print("\t", entity) }
}
lazy var lemma = lemmatize(string: text)
if toLemmatize {
print()
print("Found the following lemma:")
print("\t", lemma.formatted(.list(type: .and)))
}
if alternatives {
print()
print("Found the following alternatives:")
for word in lemma {
let embeddings = embeddings(for: word)
print("\t\(word): ", embeddings.formatted(.list(type: .and)))
}
}
func sentiment(for string: String) -> Double {
let tagger = NLTagger(tagSchemes: [.sentimentScore])
tagger.string = string
let (sentiment, _) = tagger.tag(at: string.startIndex, unit: .paragraph, scheme: .sentimentScore)
return Double(sentiment?.rawValue ?? "0") ?? 0
}
func embeddings(for word: String) -> [String] {
var results = [String]()
if let embedding = NLEmbedding.wordEmbedding(for: .english) {
let similarWords: [(String, Double)] = embedding.neighbors(for: word, maximumCount: 10)
for word in similarWords {
results.append("\(word.0) has a distance of \(round(word.1 * 100)/100)")
}
}
return results
}
func lemmatize(string: String) -> [String] {
let tagger = NLTagger(tagSchemes: [.lemma])
tagger.string = string
var results = [String]()
tagger.enumerateTags(in: string.startIndex..<string.endIndex, unit: .word, scheme: .lemma) { tag, range in
let stemForm = tag?.rawValue ?? String(string[range]).trimmingCharacters(in: .whitespaces)
if !stemForm.isEmpty {
results.append(stemForm)
}
return true
}
return results
}
func entities(for string: String) -> [String] {
let tagger = NLTagger(tagSchemes: [.nameType])
tagger.string = string
var results = [String]()
tagger.enumerateTags(in: string.startIndex..<string.endIndex, unit: .word, scheme: .nameType, options: .joinNames) { tag, range in
guard let tag = tag else { return true }
let match = String(string[range])
switch tag {
case .organizationName:
results.append("Organization: \(match)")
case .personalName:
results.append("Person: \(match)")
case .placeName:
results.append("Place: \(match)")
default:
break
}
return true
}
return results
}
}
}