forked from jphcoi/crawtext
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Crawtext.yaml
executable file
·87 lines (82 loc) · 2.28 KB
/
Crawtext.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
######## example_param.init ######################################
##
############## Description du script lui-même pour l'utilisateur de ctManager, ainsi que les formats d'entrée/sorties ############################
##
script:
name: crawl_trial
desc: "Launch a crawl on the www"
type: data collection
author: jp cointet
tags: [crawl, web]
inputs:
-
type: text
categ: records
structure: isi
outputs:
-
type: sqlite
categ: biblio
structure: reseaulu
##
############## Paramètres du scripts (construction du formulaire utilisateur et variables renvoyées aux script) ############
##
params:
# corpus_type:
# label: Corpus type
# type:
# widget: list
# multiple: no
# choices: ['isi','csv']#, 'pubmed', 'text','scopus']
# default: isi
# year_fiel:
# label: 'If your csv file include a year entry, please indicate the column name'
# type: string
# default: ""
# corpus_origin_file:
# type: string
# default: ""
# output_type:
# label: Output Format
# type:
# widget: list
# multiple: no
# choices: ['reseaulu']#, 'classique']
# default: reseaulu
# reinit_db:
# label: Build a new database or update a previous one ?
# type:
# widget: list
# multiple: no
# choices: ['yes','no']
# default: 'yes'
# project_name:
# type: string
# default: ""
inlinks_min:
label: 'Minimum number of incoming links to consider a webpage'
type: string
default: '2'
depth:
label: 'Maximum number of corpus extension steps'
type: string
default: '5'
query:
label: 'Enter your query (boolean operator like "AND" is allowed)'
type: string
default: 'You really should enter a query, otherwise...'
project_name:
type: string
default: ""
project_path:
type: string
default: ""
max_pages_number:
label: 'Enter the max number of pages to visit (caution: the corpus may be far smaller than this number)'
type: string
default: '10000'
# result_path:
# inlinks_min=parameters.get('inlinks_min',1)
# depth=parameters.get('depth',10)
# query=parameters.get('query','You really should enter a query, otherwise...')
# result_path=parameters.get('result_path','output')