Skip to content

Commit

Permalink
Merge pull request #230 from github/lingo
Browse files Browse the repository at this point in the history
Switch over to using lingo for language detection
  • Loading branch information
tclem authored Aug 26, 2019
2 parents b7a52b4 + 0d78391 commit c1486db
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 88 deletions.
2 changes: 2 additions & 0 deletions semantic.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ common dependencies
, unix ^>= 2.7.2.2
, proto3-suite
, proto3-wire
, lingo >= 0.1.0.1

common executable-flags
ghc-options: -threaded -rtsopts "-with-rtsopts=-N -A4m -n2m"
Expand Down Expand Up @@ -360,6 +361,7 @@ test-suite test
, Data.Functor.Listable
, Data.Graph.Spec
, Data.Mergeable
, Data.Language.Spec
, Data.Range.Spec
, Data.Scientific.Spec
, Data.Semigroup.App.Spec
Expand Down
2 changes: 1 addition & 1 deletion src/Data/Blob/IO.hs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ readBlobsFromDir :: MonadIO m => FilePath -> m [Blob]
readBlobsFromDir path = liftIO . fmap catMaybes $
findFilesInDir path supportedExts mempty >>= Async.mapConcurrently (readBlobFromFile . fileForPath)

-- | Read all blobs from the Git repo with Language.supportedExts
-- | Read all blobs from a git repo
readBlobsFromGitRepo :: MonadIO m => FilePath -> Git.OID -> [FilePath] -> [FilePath] -> m [Blob]
readBlobsFromGitRepo path oid excludePaths includePaths = liftIO . fmap catMaybes $
Git.lsTree path oid >>= Async.mapConcurrently (blobFromTreeEntry path)
Expand Down
101 changes: 48 additions & 53 deletions src/Data/Language.hs
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, KindSignatures #-}
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, KindSignatures, LambdaCase #-}
module Data.Language
( Language (..)
, SLanguage (..)
, extensionsForLanguage
, parseLanguage
, knownLanguage
, languageForFilePath
, pathIsMinified
, languageForType
, supportedExts
, codeNavLanguages
, textToLanguage
, languageToText
) where

import Data.Aeson
import qualified Data.Languages as Lingo
import qualified Data.Text as T
import qualified Data.Map.Strict as Map
import Prologue
import System.FilePath.Posix

Expand Down Expand Up @@ -77,68 +79,61 @@ instance SLanguage 'PHP where

instance FromJSON Language where
parseJSON = withText "Language" $ \l ->
pure $ fromMaybe Unknown (parseLanguage l)

parseLanguage :: Text -> Maybe Language
parseLanguage l = case T.toLower l of
"go" -> Just Go
"haskell" -> Just Haskell
"java" -> Just Java
"javascript" -> Just JavaScript
"json" -> Just JSON
"jsx" -> Just JSX
"markdown" -> Just Markdown
"python" -> Just Python
"ruby" -> Just Ruby
"typescript" -> Just TypeScript
"php" -> Just PHP
_ -> Nothing
pure $ textToLanguage l

-- | Predicate failing on 'Unknown' and passing in all other cases.
knownLanguage :: Language -> Bool
knownLanguage = (/= Unknown)

-- | Returns a Language based on the file extension (including the ".").
languageForType :: String -> Language
languageForType mediaType = case mediaType of
".java" -> Java
".json" -> JSON
".hs" -> Haskell
".md" -> Markdown
".rb" -> Ruby
".go" -> Go
".js" -> JavaScript
".mjs" -> JavaScript
".ts" -> TypeScript
".tsx" -> TSX
".jsx" -> JSX
".py" -> Python
".php" -> PHP
".phpt" -> PHP
_ -> Unknown

extensionsForLanguage :: Language -> [String]
extensionsForLanguage language = case language of
Go -> [".go"]
Haskell -> [".hs"]
JavaScript -> [".js", ".mjs"]
PHP -> [".php", ".phpt"]
Python -> [".py"]
Ruby -> [".rb"]
TypeScript -> [".ts"]
TSX -> [".tsx", ".d.tsx"]
JSX -> [".jsx"]
_ -> []

-- | Return a language based on a FilePath's extension, or Nothing if extension is not found or not supported.
extensionsForLanguage language = T.unpack <$> maybe mempty Lingo.languageExtensions (Map.lookup (languageToText language) Lingo.languages)

-- | Return a language based on a FilePath's extension.
languageForFilePath :: FilePath -> Language
languageForFilePath = languageForType . takeExtension
languageForFilePath path = maybe Unknown (textToLanguage . Lingo.languageName) (Lingo.languageForPath path)

supportedExts :: [String]
supportedExts = [".go", ".py", ".rb", ".js", ".mjs", ".ts", ".php", ".phpt"]
supportedExts = foldr append mempty supportedLanguages
where
append (Just l) b = fmap T.unpack (Lingo.languageExtensions l) <> b
append Nothing b = b
supportedLanguages = fmap lookup (languageToText <$> codeNavLanguages)
lookup k = Map.lookup k Lingo.languages

codeNavLanguages :: [Language]
codeNavLanguages = [Go, Ruby, Python, JavaScript, TypeScript, PHP]

pathIsMinified :: FilePath -> Bool
pathIsMinified = isExtensionOf ".min.js"

languageToText :: Language -> T.Text
languageToText = \case
Unknown -> "Unknown"
Go -> "Go"
Haskell -> "Haskell"
Java -> "Java"
JavaScript -> "JavaScript"
JSON -> "JSON"
JSX -> "JSX"
Markdown -> "Markdown"
Python -> "Python"
Ruby -> "Ruby"
TypeScript -> "TypeScript"
TSX -> "TSX"
PHP -> "PHP"

textToLanguage :: T.Text -> Language
textToLanguage = \case
"Go" -> Go
"Haskell" -> Haskell
"Java" -> Java
"JavaScript" -> JavaScript
"JSON" -> JSON
"JSX" -> JSX
"Markdown" -> Markdown
"Python" -> Python
"Ruby" -> Ruby
"TypeScript" -> TypeScript
"TSX" -> TSX
"PHP" -> PHP
_ -> Unknown
33 changes: 1 addition & 32 deletions src/Semantic/Api/Bridge.hs
Original file line number Diff line number Diff line change
Expand Up @@ -64,38 +64,7 @@ instance APIConvert Legacy.Span Data.Span where
fromAPI Legacy.Span {..} = Data.Span <$> (start >>= preview bridging) <*> (end >>= preview bridging)

instance APIBridge T.Text Data.Language where
bridging = iso apiLanguageToLanguage languageToApiLanguage where
languageToApiLanguage :: Data.Language -> T.Text
languageToApiLanguage = \case
Data.Unknown -> "Unknown"
Data.Go -> "Go"
Data.Haskell -> "Haskell"
Data.Java -> "Java"
Data.JavaScript -> "JavaScript"
Data.JSON -> "JSON"
Data.JSX -> "JSX"
Data.Markdown -> "Markdown"
Data.Python -> "Python"
Data.Ruby -> "Ruby"
Data.TypeScript -> "TypeScript"
Data.TSX -> "TSX"
Data.PHP -> "PHP"

apiLanguageToLanguage :: T.Text -> Data.Language
apiLanguageToLanguage = \case
"Go" -> Data.Go
"Haskell" -> Data.Haskell
"Java" -> Data.Java
"JavaScript" -> Data.JavaScript
"JSON" -> Data.JSON
"JSX" -> Data.JSX
"Markdown" -> Data.Markdown
"Python" -> Data.Python
"Ruby" -> Data.Ruby
"TypeScript" -> Data.TypeScript
"TSX" -> Data.TSX
"PHP" -> Data.PHP
_ -> Data.Unknown
bridging = iso Data.textToLanguage Data.languageToText

instance APIBridge API.Blob Data.Blob where
bridging = iso apiBlobToBlob blobToApiBlob where
Expand Down
18 changes: 16 additions & 2 deletions src/Semantic/CLI.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import Control.Exception as Exc (displayException)
import Data.Blob
import Data.Blob.IO
import Data.Handle
import Data.Language (languageForFilePath, parseLanguage)
import qualified Data.Language as Language
import Data.List (intercalate, uncons)
import Data.List.Split (splitWhen)
import Data.Project
Expand Down Expand Up @@ -180,8 +180,22 @@ filePathReader = eitherReader parseFilePath
parseFilePath arg = case splitWhen (== ':') arg of
[a, b] | Just lang <- parseLanguage (T.pack b) -> Right (File a lang)
| Just lang <- parseLanguage (T.pack a) -> Right (File b lang)
[path] -> Right (File path (languageForFilePath path))
[path] -> Right (File path (Language.languageForFilePath path))
_ -> Left ("cannot parse `" <> arg <> "`\nexpecting FILE:LANGUAGE or just FILE")
parseLanguage :: Text -> Maybe Language.Language
parseLanguage l = case T.toLower l of
"go" -> Just Language.Go
"haskell" -> Just Language.Haskell
"java" -> Just Language.Java
"javascript" -> Just Language.JavaScript
"json" -> Just Language.JSON
"jsx" -> Just Language.JSX
"markdown" -> Just Language.Markdown
"python" -> Just Language.Python
"ruby" -> Just Language.Ruby
"typescript" -> Just Language.TypeScript
"php" -> Just Language.PHP
_ -> Nothing

options :: Eq a => [(String, a)] -> Mod OptionFields a -> Parser a
options options fields = option (optionsReader options) (fields <> showDefaultWith (findOption options) <> metavar (intercalate "|" (fmap fst options)))
Expand Down
13 changes: 13 additions & 0 deletions test/Data/Language/Spec.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module Data.Language.Spec (testTree) where

import Data.Language
import Test.Tasty
import Test.Tasty.HUnit

testTree :: TestTree
testTree = testGroup "Data.Language"
[ testCase "supportedExts returns expected list" $
supportedExts @=? [".go",".rb",".builder",".eye",".fcgi",".gemspec",".god",".jbuilder",".mspec",".pluginspec",".podspec",".rabl",".rake",".rbuild",".rbw",".rbx",".ru",".ruby",".spec",".thor",".watchr",".py",".bzl",".cgi",".fcgi",".gyp",".gypi",".lmi",".py3",".pyde",".pyi",".pyp",".pyt",".pyw",".rpy",".spec",".tac",".wsgi",".xpy",".js","._js",".bones",".es",".es6",".frag",".gs",".jake",".jsb",".jscad",".jsfl",".jsm",".jss",".mjs",".njs",".pac",".sjs",".ssjs",".xsjs",".xsjslib",".ts",".php",".aw",".ctp",".fcgi",".inc",".php3",".php4",".php5",".phps",".phpt"]
, testCase "codeNavLanguages returns expected list" $
codeNavLanguages @=? [Go, Ruby, Python, JavaScript, TypeScript, PHP]
]
2 changes: 2 additions & 0 deletions test/Spec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import qualified Data.Abstract.Name.Spec
import qualified Data.Abstract.Path.Spec
import qualified Data.Functor.Classes.Generic.Spec
import qualified Data.Graph.Spec
import qualified Data.Language.Spec
import qualified Data.Range.Spec
import qualified Data.Scientific.Spec
import qualified Data.Semigroup.App.Spec
Expand Down Expand Up @@ -46,6 +47,7 @@ tests :: (?session :: TaskSession) => [TestTree]
tests =
[ Integration.Spec.testTree
, Semantic.CLI.Spec.testTree
, Data.Language.Spec.testTree
, Data.Source.Spec.testTree
, Semantic.Stat.Spec.testTree
]
Expand Down

0 comments on commit c1486db

Please sign in to comment.