/* This file is part of Bolixo. Bolixo is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Bolixo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Bolixo. If not, see . */ /* Get RSS feed from site and inject that as public message in bolixo rss account for this site. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xmlflat.h" #include "bolixo.h" #include "bolixo.m" #include "instrument.h" #include "helper.h" using namespace std; static DEBUG_KEY D_GETRSS ("getrss","Protocol to retrieve RSS"); static DEBUG_KEY D_PROTO ("proto","Protocol information"); static DEBUG_KEY D_HEADER ("header","http header"); static DEBUG_KEY D_XML ("xml","Show XML content"); enum CONNECT_TYPE { TYPE_NONE, TYPE_CONTROL, TYPE_CLIENT, TYPE_WORKER}; struct HANDLE_INFO: public ARRAY_OBJ{ CONNECT_TYPE type; REQUEST_INFO req; HANDLE_INFO(){ type = TYPE_NONE; } }; #include "proto/rssd_control.protoh" #include "proto/bod_client.protodef" #define bod_client_login_NOTNEED #define bod_client_logout_NOTNEED #define bod_client_createsession_NOTNEED #define bod_client_adduser_NOTNEED #define bod_client_confirmuser_NOTNEED #define bod_client_deleteuser_NOTNEED #define bod_client_confirmdelete_NOTNEED #define bod_client_addfile_NOTNEED #define bod_client_addfile_bob_NOTNEED #define bod_client_appendfile_NOTNEED #define bod_client_delfile_NOTNEED #define bod_client_undelete_NOTNEED #define bod_client_modifyfile_NOTNEED #define bod_client_modifyfile_bob_NOTNEED #define bod_client_rename_NOTNEED #define bod_client_copy_NOTNEED #define bod_client_readfile_NOTNEED #define bod_client_readfile_bob_NOTNEED #define bod_client_readmore_NOTNEED #define bod_client_mkdir_NOTNEED #define bod_client_rmdir_NOTNEED #define bod_client_listdir_NOTNEED #define bod_client_stat_NOTNEED #define bod_client_set_access_NOTNEED #define bod_client_markview_NOTNEED #define bod_client_create_group_list_NOTNEED #define bod_client_create_group_NOTNEED #define bod_client_set_group_NOTNEED #define bod_client_set_member_NOTNEED #define bod_client_set_list_desc_NOTNEED #define bod_client_set_group_desc_NOTNEED #define bod_client_delete_list_NOTNEED #define bod_client_delete_group_NOTNEED #define bod_client_list_lists_NOTNEED #define bod_client_list_groups_NOTNEED #define bod_client_create_project_dir_NOTNEED #define bod_client_list_contacts_NOTNEED #define bod_client_list_inboxes_NOTNEED #define bod_client_list_msgs_NOTNEED #define bod_client_sendmsg_NOTNEED #define bod_client_sendmsg_project_NOTNEED #define bod_client_replymsg_NOTNEED #define bod_client_replymsg_project_NOTNEED #define bod_client_sendattach_NOTNEED #define bod_client_verifysign_NOTNEED #define bod_client_getpubkey_NOTNEED #define bod_client_registernode_NOTNEED #define bod_client_remotelogin_NOTNEED #define bod_client_remotepass_NOTNEED #define bod_client_remote_interest_set_NOTNEED #define bod_client_remote_interest_unset_NOTNEED #define bod_client_nodelogin_NOTNEED #define bod_client_nodepass_NOTNEED #define bod_client_sendtalk_anon_NOTNEED #define bod_client_sendtalk_file_NOTNEED #define bod_client_list_talk_NOTNEED #define bod_client_contact_request_NOTNEED #define bod_client_contact_manage_NOTNEED #define bod_client_contact_list_NOTNEED #define bod_client_config_read_NOTNEED #define bod_client_config_write_NOTNEED #define bod_client_public_checkuser_NOTNEED #define bod_client_public_listdir_NOTNEED #define bod_client_public_readfile_NOTNEED #define bod_client_public_list_talk_NOTNEED #define bod_client_form_savevar_NOTNEED #define bod_client_form_readvar_NOTNEED #define bod_client_form_deletevar_NOTNEED #define bod_client_form_deleteall_NOTNEED #define bod_client_interest_set_NOTNEED #define bod_client_interest_unset_NOTNEED #define bod_client_interest_list_NOTNEED #define bod_client_interest_check_NOTNEED #define bod_client_systempubkey_NOTNEED #define bod_client_systemsign_NOTNEED #define bod_client_info_read_NOTNEED #define bod_client_info_write_NOTNEED #define bod_client_get_notification_NOTNEED #define bod_client_set_notification_NOTNEED #include "proto/bod_client.protoch" struct WEBSITE_OPTIONS{ bool remove_a = false; bool remove_p = false; bool remove_div = false; bool remove_img = false; bool remove_iframe = false; bool remove_h6 = false; bool remove_comma = false; bool remove_h = false; WEBSITE_OPTIONS() = default; bool empty() const { return !remove_a && !remove_p && !remove_div && !remove_img && !remove_h6 && !remove_comma && !remove_iframe && !remove_h; } }; struct WEBSITE{ string account; // Bolixo account string rssurl; // Url to retrieve the RSS string siteurl; // Url of the site string country; string state; string city; string lang; string name; // Official name of the site string deptname; // Department name or sub-name string session; // Pre-logged session string mini_photo_url; string photo_url; WEBSITE_OPTIONS options; void clear(){ *this = WEBSITE(); } void printfield (const char *field) const{ const char *p = NULL; string tmp; if (strcmp(field,"account")==0){ p = account.c_str(); }else if (strcmp(field,"rssurl")==0){ p = rssurl.c_str(); }else if (strcmp(field,"siteurl")==0){ p = siteurl.c_str(); }else if (strcmp(field,"lang")==0){ p = lang.c_str(); }else if (strcmp(field,"name")==0){ p = name.c_str(); }else if (strcmp(field,"deptname")==0){ p = deptname.c_str(); }else if (strcmp(field,"country")==0){ p = country.c_str(); }else if (strcmp(field,"state")==0){ p = state.c_str(); }else if (strcmp(field,"city")==0){ p = city.c_str(); }else if (strcmp(field,"photo_url")==0){ p = photo_url.c_str(); }else if (strcmp(field,"mini_photo_url")==0){ p = mini_photo_url.c_str(); }else if (strcmp(field,"options")==0){ if (options.remove_a) tmp = "remove_a "; if (options.remove_p) tmp = "remove_p "; if (options.remove_div) tmp += "remove_div "; if (options.remove_img) tmp += "remove_img "; if (options.remove_h6) tmp += "remove_h6 "; if (options.remove_comma) tmp += "remove_comma "; if (options.remove_iframe) tmp += "remove_iframe "; if (options.remove_h) tmp += "remove_h "; p = tmp.c_str(); }else{ tlmp_error ("Unknown field name %s\n",field); } if (p != NULL) printf ("%s\n",p); } }; struct RSSD_CONFIG{ vector websites; void clear(){ websites.clear(); } void readfile(PARAM_STRING file); void readdir(PARAM_STRING file); void read(PARAM_STRING file); void readsessions(PARAM_STRING file); void fillempty(WEBSITE &w) const; }; static constexpr unsigned length(const char *s) { return *s == '\0' ? 0 : 1+length(s+1); } struct CONST_STR{ const char *pt; unsigned len; constexpr CONST_STR(const char *s) :pt(s),len(length(s)){ } }; static bool rssd_compare (PARAM_STRING line, const CONST_STR &word, string &value) { bool ret = false; if (strncmp(line.ptr,word.pt,word.len)==0){ ret = true; value = str_skip(line.ptr+word.len); strip_end (value); } return ret; } static int rssd_ncmp (PARAM_STRING line, const CONST_STR &word) { return strncmp(line.ptr,word.pt,word.len); } static void copyif (string &dst, const string &src) { if (dst.empty()) dst = src; } /* Fill empty fields using a parent account, if found An account name goes like this: rss:some_name[-suffix] Parent accounts has the same prefix as */ void RSSD_CONFIG::fillempty(WEBSITE &w) const { auto pos = w.account.find ('_'); if (pos != string::npos){ string parent_account = w.account.substr(0,pos); for (auto const &p:websites){ if (p.account == parent_account){ copyif (w.siteurl,p.siteurl); copyif (w.country,p.country); copyif (w.state,p.state); copyif (w.city,p.city); copyif (w.lang,p.lang); copyif (w.name,p.name); copyif (w.deptname,p.deptname); copyif (w.mini_photo_url,p.mini_photo_url); copyif (w.photo_url,p.photo_url); if (w.options.empty()) w.options = p.options; break; } } } } void RSSD_CONFIG::readfile (PARAM_STRING configfile) { glocal RSSD_CONFIG *config = this; glocal WEBSITE website; (configfile,true); constexpr CONST_STR account("account:"); string next; if (rssd_compare(line,CONST_STR("account:"),next)){ if (glocal.website.account.size() > 0){ glocal.config->fillempty(glocal.website); glocal.config->websites.push_back(move(glocal.website)); glocal.website.clear(); } glocal.website.account = next; }else if (rssd_compare(line,CONST_STR("rssurl:"),glocal.website.rssurl)){ }else if (rssd_compare(line,CONST_STR("siteurl:"),glocal.website.siteurl)){ }else if (rssd_compare(line,CONST_STR("country:"),glocal.website.country)){ }else if (rssd_compare(line,CONST_STR("state:"),glocal.website.state)){ }else if (rssd_compare(line,CONST_STR("city:"),glocal.website.city)){ }else if (rssd_compare(line,CONST_STR("lang:"),glocal.website.lang)){ }else if (rssd_compare(line,CONST_STR("name:"),glocal.website.name)){ }else if (rssd_compare(line,CONST_STR("deptname:"),glocal.website.deptname)){ }else if (rssd_compare(line,CONST_STR("mini-photourl:"),glocal.website.mini_photo_url)){ }else if (rssd_compare(line,CONST_STR("photourl:"),glocal.website.photo_url)){ }else if (rssd_compare(line,CONST_STR("options:"),next)){ vector tb; str_splitline(next,' ',tb); for (auto const &t:tb){ if (t == "remove_a"){ glocal.website.options.remove_a = true; }else if (t == "remove_p"){ glocal.website.options.remove_p = true; }else if (t == "remove_div"){ glocal.website.options.remove_div = true; }else if (t == "remove_img"){ glocal.website.options.remove_img = true; }else if (t == "remove_h6"){ glocal.website.options.remove_h6 = true; }else if (t == "remove_comma"){ glocal.website.options.remove_comma = true; }else if (t == "remove_iframe"){ glocal.website.options.remove_iframe = true; }else if (t == "remove_h"){ glocal.website.options.remove_h = true; }else{ tlmp_error (MSG_U(E_IVLDOPT,"Invalid option %s for website %s\n") ,t.c_str(),glocal.website.account.c_str()); } } }else{ const char *pt = str_skip(line); if (pt[0] != '#' && pt[0] != '\0'){ tlmp_error ("Invalid configuration line %d: %s\n",noline+1,line); } } return 0; if (glocal.website.account.size() > 0){ fillempty(glocal.website); websites.push_back(move(glocal.website)); } } void RSSD_CONFIG::readdir (PARAM_STRING configfile) { glocal RSSD_CONFIG *config = this; (configfile); if (file_type(path)==0) glocal.config->readfile(path); } void RSSD_CONFIG::read (PARAM_STRING configfile) { clear(); auto type = file_type(configfile.ptr); if (type == -1){ tlmp_error ("Configuration file %s does not exist\n",configfile.ptr); }else if (type == 0){ readfile(configfile); }else if (type == 1){ readdir (configfile); }else{ tlmp_error ("Configuration path %s is not a file nor a directory\n",configfile.ptr); } } // Read the session ID allocated to each account void RSSD_CONFIG::readsessions (PARAM_STRING file) { glocal RSSD_CONFIG *config = this; (file,true); vector tb; if (str_splitline(line,' ',tb)==2){ for (auto &w:glocal.config->websites){ if (w.account == tb[0]){ w.session = tb[1]; break; } } } return 0; } static int rssd_parseurl (PARAM_STRING purl, string &baseurl, string &hostname, string &file) { int ret = -1; const char *url = purl.ptr; const char *pthost; if (is_start_any_ofnc(url,pthost,"http://","https://")){ const char *ptfile = strchr(pthost,'/'); if (ptfile == NULL){ baseurl = url; hostname = pthost; }else{ file = ptfile; baseurl = string(url,ptfile-url); hostname = string(pthost,ptfile-pthost); } ret = 0; } return ret; } static string url1,url2,state; static bool signal_seen = false; static unsigned signal_count = 0; static void fct_alarm(int) { signal_seen = true; signal_count++; tlmp_error ("Alarm state=%s url1=%s url2=%s\n",state.c_str(),url1.c_str(),url2.c_str()); alarm (10); } static int rssd_getrss(PARAM_STRING url, string &rss) { glocal string *rss = &rss; glocal int ret = -1; glocal string newlocation = url.ptr; signal_seen = false; url1 = glocal.newlocation; signal (SIGALRM,fct_alarm); alarm (10); while (glocal.newlocation.size() > 0){ state = "connecting"; url2 = glocal.newlocation; // For alarm glocal bool relocated = false; glocal CONNECT_HTTP_INFO con; glocal unsigned received = 0; glocal bool header_seen = false; string baseurl,hostname,file; if (rssd_parseurl(glocal.newlocation,baseurl,hostname,file)==-1) return -1; glocal.newlocation.clear(); rss.clear(); glocal.con.init (baseurl); debug_printf (D_GETRSS,"host=%s\n",glocal.con.host.c_str()); string buf = string_f ("GET %s HTTP/1.0\r\nhost: %s\r\nUser-Agent: bolixo rssd\r\n",file.c_str(),hostname.c_str()); buf += string_f ("Content-Type: text/html; charset=UTF-8\r\n\r\n"); glocal.con.send (buf.c_str()); debug_printf (D_HEADER,"%s-----\n",buf.c_str()); if (signal_seen) break; state = "reading"; (); int ret = glocal.con.receive (buf,size); if (signal_seen){ tlmp_error ("signal seen\n"); ret = -1; end = true; }else if (ret <= 0){ debug_printf (D_GETRSS,"fill received=%u ret=%d size=%u %d(%s)\n",glocal.received,ret,size,errno,strerror(errno)); ret = 0; }else{ glocal.received += ret; } return ret; int ret = 0; const char *line = (const char *)buf; debug_printf (D_GETRSS,"header_seen=%d process len=%u\n",glocal.header_seen,len); if (glocal.header_seen){ (*glocal.rss) += string(line,len); //if (strcasecmp(line,"")==0){ // glocal.ret = 0; // end = true; ret = len; }else{ // We extract one line of the header const char *endbuf = line+len; const char *pt = line; while (pt < endbuf && *pt != '\n') pt++; debug_printf (D_GETRSS,"header_seen=%d process len=%u %lu %d\n",glocal.header_seen,len,pt-line,pt } if (rss.size() > 0 && glocal.ret == -1){ // Some sites produces bad XML. We patch it here auto end_pos = rss.find(""); if (end_pos == string::npos){ auto rss_pos = rss.find(" #define _TLMP_rssd_processxml struct _F_rssd_processxml{ #define _F_rssd_processxml_onepage(x) void x onepage(const string &title, const string &link, const string &pubdate, const string &guid, const string &description, const string &content) virtual _F_rssd_processxml_onepage( )=0; }; static void rssd_processxml(_F_rssd_processxml &c, const string &rss) { xmlDoc *doc = xmlParseDoc((const xmlChar*)rss.c_str()); if (doc != NULL){ xmlNode *cur = xmlDocGetRootElement(doc); glocal string title; glocal string link; glocal string pubdate; glocal string guid; glocal string description; glocal string content; glocal _F_rssd_processxml *c = &c; (cur->xmlChildrenNode,""); debug_printf (D_XML,"parent=%s name=%s path=%s\n",parent,name,path); if (rssd_ncmp(path,CONST_STR("/item/"))==0 || rssd_ncmp(path,CONST_STR("/channel/item/"))==0){ const char *val = (const char*)xmlNodeGetContent(node); if (strcmp(name,"title")==0){ glocal.title = val; }else if (strcmp(name,"link")==0){ //tlmp_error ("link=%s ns=%p %s\n",path,node->ns,val); if (glocal.link.empty()) glocal.link = val; }else if (strcmp(name,"pubDate")==0){ glocal.pubdate = val; }else if (strcmp(name,"date")==0){ glocal.pubdate = val; }else if (strcmp(name,"guid")==0){ glocal.guid = val; }else if (strcmp(name,"description")==0){ glocal.description = val; }else if (strcmp(name,"content")==0){ glocal.content = val; } } if (strcmp(name,"item")==0){ glocal.c->onepage(glocal.title,glocal.link,glocal.pubdate,glocal.guid,glocal.description,glocal.content); glocal.title.clear(); glocal.link.clear(); glocal.pubdate.clear(); glocal.guid.clear(); glocal.description.clear(); glocal.content.clear(); } } } /* Remove some HTML from the RSS description to make it fit better as a message */ static string rssd_formatdesc (const string &description, const WEBSITE_OPTIONS &options) { string ret; ret.reserve(description.size()); const char *ptdesc = description.c_str(); bool cdata_seen = false; while (*ptdesc != '\0'){ const char *pt; if (rssd_ncmp(ptdesc,CONST_STR("' && *ptdesc != '\0') ptdesc++; if (*ptdesc == '>') ptdesc++; }else if (rssd_ncmp(ptdesc,CONST_STR("') ptdesc++; }else if (rssd_ncmp(ptdesc,CONST_STR(""))==0){ ptdesc += 7; }else if (rssd_ncmp(ptdesc,CONST_STR(""))==0){ ptdesc += 8; }else if (rssd_ncmp(ptdesc,CONST_STR(""))==0){ ptdesc += 9; }else if (rssd_ncmp(ptdesc,CONST_STR("') ptdesc++; if (ret.size() > 0) ret += '\n'; }else if (rssd_ncmp(ptdesc,CONST_STR("

"))==0){ ptdesc += 3; if (ret.size() > 0) ret += '\n'; }else if (rssd_ncmp(ptdesc,CONST_STR("

"))==0){ ptdesc += 4; }else if (rssd_ncmp(ptdesc,CONST_STR("
"); if (end != nullptr){ ptdesc = end+6; if (*ptdesc == '\n') ptdesc++; }else{ ptdesc += strlen(ptdesc); } }else if (rssd_ncmp(ptdesc,CONST_STR(""); if (end != nullptr){ ptdesc = end+1; if (*ptdesc == '\n') ptdesc++; }else{ ptdesc += strlen(ptdesc); } }else{ const char *pt = strcasestr(ptdesc,"src="); if (pt != nullptr){ pt += 4; if (*pt == '"') pt++; const char *end; if (is_start_any_ofnc(pt,end,"http://","https://")){ while (*end != '\0' && *end > ' ' && *end != '>' && *end != '"') end++; ret += "_IMG=" + string(pt,end-pt) + " "; if (*end == '"') end++; while (*end != '\0' && *end != '>') end++; if (*end == '>') end++; ptdesc = end; } } } }else if (rssd_ncmp(ptdesc,CONST_STR(""); if (end != nullptr){ ptdesc = end+4; if (*ptdesc == '\n') ptdesc++; }else{ ptdesc += strlen(ptdesc); } }else if (rssd_ncmp(ptdesc,CONST_STR(""); if (end != nullptr){ ptdesc = end+4; if (*ptdesc == '\n') ptdesc++; }else{ ptdesc += strlen(ptdesc); } }else if (options.remove_h && is_start_any_ofnc(ptdesc,pt ,"

","

","

","

" ,"

","","","")){ ptdesc = pt; }else if (rssd_ncmp(ptdesc,CONST_STR(""); if (end != nullptr){ ptdesc = end+5; if (*ptdesc == '\n') ptdesc++; }else{ ptdesc += strlen(ptdesc); } }else if (rssd_ncmp(ptdesc,CONST_STR(" const char *end = strstr(ptdesc,""); if (end != nullptr){ ptdesc = end+9; if (*ptdesc == '\n') ptdesc++; }else{ ptdesc += strlen(ptdesc); } }else{ ret += *ptdesc++; } } if (options.remove_comma){ // After using various remove_ options, we end up with a bunch of commas. while (true){ auto pos = ret.find(",,"); if (pos == string::npos){ break; }else{ unsigned start = pos; while (ret[pos] == ',') pos++; ret.replace(start,pos-start,""); } } while (true){ auto pos = ret.find(" "); if (pos == string::npos){ break; }else{ ret.replace(pos,6,""); } } while (true){ auto pos = ret.find("\t|"); if (pos == string::npos){ break; }else{ ret.replace(pos,2,""); } } } // Remove spaces at the end and front strip_end (ret); for (unsigned i=0; i 0) ret.replace (0,i,""); break; } } return ret; } static const char *savexmldir = "/tmp/savexml"; static const char *savelinkdir = "/var/lib/rssd/savelink"; static const char *tmp_dir = "/tmp"; static const char *force_account = NULL; // For testing, all messages are sent to this account struct PROCESS_STATS{ unsigned newmsgs=0; unsigned keepmsgs=0; }; static int rssd_process (CONNECT_INFO &con, RSSD_CONFIG &config, PARAM_STRING name, string &msg, bool verbose, PROCESS_STATS &stats) { glocal bool verbose = verbose; glocal PROCESS_STATS *stats = &stats; glocal CONNECT_INFO *con = &con; int ret = -1; bool found = false; for (auto &r:config.websites){ if (r.account == name.ptr){ found = true; glocal WEBSITE *r = &r; glocal string rss; if (r.session.empty()){ tlmp_error (MSG_U(E_NOSESSSION,"Session not set for account %s\n"),name.ptr); break; } if (rssd_getrss(r.rssurl,glocal.rss)==-1){ tlmp_error (MSG_U(E_GETRSS,"Error getting the RSS for account %s\n"),name.ptr); ret = 0; // We have to continue for other accounts. This is a network // error probably and this will fix itself next time. }else{ if (savexmldir != NULL){ mkdir (savexmldir,0755); (string_f("%s/%s.xml",savexmldir,name.ptr),false); fprintf (fout,"%s",glocal.rss.c_str()); return 0; } // Load the link already process for that site mkdir (savelinkdir,0755); glocal map links; glocal time_t now = time(NULL); string fname = string_f("%s/%s.links",savelinkdir,name.ptr); (fname,true); // Not a problem. this files is created on the fly // Older versions had the date and the link as the key, but it was not reliable. // Now, we only keep the URL. unsigned timestamp = atoi(line); line = str_skipdig(line); line = str_skip(line); if (line[0] != '\0') glocal.links[line] = timestamp; return 0; (glocal.rss); string fulllink = link; strip_end (fulllink); if (!is_start_any_ofnc(fulllink,NONEED,"http://","https://")){ // This is a relative URL, we use the siteurl to make it complete fulllink = glocal.r->siteurl+fulllink; } auto mlink = glocal.links.find(fulllink); if (mlink == glocal.links.end()){ string formatdesc = rssd_formatdesc(description,glocal.r->options); if (glocal.verbose) printf (MSG_U(I_GENMSG,"Create a message for link %s\n"),fulllink.c_str()); glocal string content; glocal.content = string_f("%s\n%s\n%s",title.c_str(),fulllink.c_str(),formatdesc.c_str()); glocal bool success = false; vector empty; BOB_TYPE content(glocal.content.c_str(),glocal.content.size(),false); const char *groupowner = force_account != NULL ? force_account : glocal.r->account.c_str(); (*glocal.con,glocal.r->session,"",empty,"public",groupowner,content,false,"","",""); if (!success) tlmp_error (MSG_U(E_RSSSENDFAIL,"Can't publish the RSS message: %s\n"),msg); glocal.success = success; if (glocal.success) glocal.links[fulllink] = glocal.now; string textfile = string_f("%s/rss.txt",tmp_dir); (textfile,false); fprintf (fout,"%s",glocal.content.c_str()); return 0; glocal.stats->newmsgs++; }else{ if (glocal.verbose) printf (MSG_U(I_ALREADY,"Old message ignored for link %s\n"),fulllink.c_str()); mlink->second = glocal.now; // Whatever is in the RSS is kept current. glocal.stats->keepmsgs++; } // Save the links (fname,false); // We keep old links around for 30 days time_t old = time(NULL)-(30*24*60*60); for (auto const &s:glocal.links){ if (s.second > old) fprintf (fout,"%u %s\n",s.second,s.first.c_str()); } return 0; ret = 0; } break; } } if (!found) msg = string_f(MSG_U(E_WEBSITENOTFOUND,"RSS account %s not found in configuration"),name.ptr); return ret; } int main (int argc, char *argv[]) { glocal int ret = -1; glocal const char *configfile = "/etc/bolixo/rssd.conf.d"; glocal const char *sessionsfile = "/var/run/sssd-sessions"; glocal const char *control = "/var/run/rssd.sock"; glocal const char *bod_sock = "/dev/bod.sock"; glocal const char *user = "bolixo"; glocal bool daemon = false; glocal bool testmode = false; glocal bool test_summary = false; glocal bool process = false; glocal bool printconf = false; glocal bool printaccounts = false; glocal bool server = false; glocal const char *mysecret = ""; glocal const char *pidfile = "/var/run/rssd.pid"; glocal const char *printfield = NULL; glocal const char *account = NULL; static const char *tbdic[]={"bolixo",NULL}; glocal.ret = (argc,argv,tbdic); setproginfo ("rssd",VERSION,"Convert RSS feeds into bolixo messages"); setarg (' ',"config",MSG_U(O_CONFIGPATH,"Configuration file path"),glocal.configfile,false); setarg (' ',"sessions",MSG_U(O_SESSIONS,"Session file"),glocal.sessionsfile,false); setgrouparg ("Networking"); setarg ('c',"control","Unix socket for rssd-control",glocal.control,false); setarg (' ',"bod_sock","Unix socket to reach the bod server",glocal.bod_sock,false); setarg (' ',"mysecret","Secret used to talk with bod",glocal.mysecret,false); setgrouparg ("Directories"); setarg (' ',"savexml","Save retrieved XML file (for review/debug)",savexmldir,false); setarg (' ',"savelink","Save known links about a site",savelinkdir,false); setgrouparg ("Misc."); setarg (' ',"server","Run as a server",glocal.server,false); setarg (' ',"user","Run the program as this user",glocal.user,false); setarg (' ',"daemon","Run in background",glocal.daemon,false); setarg (' ',"pidfile","File holding the PID of the process",glocal.pidfile,false); setgrouparg ("Scripting"); setarg (' ',"printaccounts","Print all accounts id",glocal.printaccounts,false); setarg (' ',"printfield","Print one information about an account",glocal.printfield,false); setarg (' ',"account","Account select for printfield",glocal.account,false); setgrouparg ("Tests"); setarg (' ',"testmode","Execution inline of some part of the rssd server",glocal.testmode,false); setarg (' ',"test_summary","Do not format rss, just print the fields",glocal.test_summary,false); setarg (' ',"process","Process some websites",glocal.process,false); setarg (' ',"printconf","Print configuration",glocal.printconf,false); setarg (' ',"force_account","Send all messages to this account",force_account,false); if (glocal.daemon){ syslog (LOG_ERR,"%s",msg); }else{ fprintf (stderr,"%s",msg); } if (glocal.daemon){ syslog (LOG_WARNING,"%s",msg); }else{ fprintf (stderr,"%s",msg); } // Test mode. int ret = -1; if (!glocal.testmode && !glocal.process){ usage(); }else if (glocal.process){ CONNECT_INFO con; // Connection to bod con.port = glocal.bod_sock; con.secret = glocal.mysecret; RSSD_CONFIG config; config.read(glocal.configfile); config.readsessions (glocal.sessionsfile); unsigned nbproc=0; PROCESS_STATS stats; ret = 0; for (int i=0; i(arg,false); glocal.rss += line; return 0; (glocal.rss); printf ("------------------------------------------------------------------------\n"); const char *ptdesc = description.c_str(); const char *pt = strchr(ptdesc,'\n'); string oneline; if (pt == NULL){ oneline = description; }else{ oneline = string(ptdesc,pt-ptdesc); } printf ("title=%s\nlink=%s\npubdate=%s\nguid=%s\ndescription=%s\ncontent=%s\n" ,title.c_str(),link.c_str(),pubdate.c_str(),guid.c_str(),oneline.c_str(),content.c_str()); if (!glocal.test_summary){ printf ("===================\n"); WEBSITE_OPTIONS options; options.remove_a = true; options.remove_p = true; options.remove_h6 = true; options.remove_comma = true; string formatdesc = rssd_formatdesc (description,options); printf ("%s\n%s\n%s",title.c_str(),link.c_str(),formatdesc.c_str()); } } } } return ret; int ret = -1; if (glocal.printconf){ RSSD_CONFIG config; config.read(glocal.configfile); for (auto const &w:config.websites){ printf ("account:%s\n",w.account.c_str()); printf ("\tsiteurl:%s\n",w.siteurl.c_str()); printf ("\trssurl:%s\n",w.rssurl.c_str()); printf ("\tlang:%s\n",w.lang.c_str()); printf ("\tname:%s\n",w.name.c_str()); printf ("\tdeptname:%s\n",w.deptname.c_str()); printf ("\tcountry:%s\n",w.country.c_str()); printf ("\tstate:%s\n",w.state.c_str()); printf ("\tcity:%s\n",w.city.c_str()); printf ("\tphotourl:%s\n",w.photo_url.c_str()); printf ("\tmini-photourl:%s\n",w.mini_photo_url.c_str()); printf ("\toptions:"); w.printfield("options"); } ret = 0; }else if (glocal.printaccounts){ RSSD_CONFIG config; config.read(glocal.configfile); for (auto const &w:config.websites){ if (w.rssurl.size() > 0){ if (w.siteurl.empty() || w.mini_photo_url.empty() || w.photo_url.empty() || w.country.empty()){ tlmp_error (MSG_U(E_INCOMPLETEACC,"Incomplete RSS account %s\n"),w.account.c_str()); }else{ printf ("%s\n",w.account.c_str()); } } } ret = 0; }else if (glocal.printfield != NULL){ RSSD_CONFIG config; config.read(glocal.configfile); if (glocal.account == NULL){ for (auto const &w:config.websites){ w.printfield(glocal.printfield); } }else{ const auto &w = find_if(config.websites.begin(),config.websites.end(),[this](const auto &w){ return w.account == glocal.account; }); if (w != config.websites.end()){ w->printfield(glocal.printfield); } } }else if (glocal.server){ glocal RSSD_CONFIG config; glocal CONNECT_INFO con; // Connection to bod glocal string controlport = string_f("unix:%s",glocal.control); glocal.con.port = glocal.bod_sock; glocal.con.secret = glocal.mysecret; (); HANDLE_INFO *n = new HANDLE_INFO; info.data = n; // tlmp_error ("port=%s control=%s client=%s\n",info.port,glocal.controlport.c_str(),glocal.clientport.c_str()); if (string_cmp(info.port,glocal.controlport)==0){ n->type = TYPE_CONTROL; } debug_printf (D_PROTO,"receive line: %s\n",line); HANDLE_INFO *c = (HANDLE_INFO*)info.data; static const char *tbtype[]={"none","control request","client request", "worker request"}; ERROR_PREFIX prefix ("%s: ",tbtype[c->type]); if (c->type == TYPE_CONTROL){ (this,c->req,line, info.linelen,endserver, endclient, no,c); vector tb; tb.push_back(string_f ("Version %s",VERSION)); rep_status(tb); endserver = true; if (on){ debug_seton(); }else{ debug_setoff(); } debug_setfdebug (filename); // connectto port send = lines:v glocal const char *send = send; glocal vector lines; // We want to test publishd connectivity to the outside (connectto,port,5); sendf ("%s\n",glocal.send); glocal.lines.push_back(line); end = true; glocal.lines.emplace_back(string_f("fail: %s\n",strerror(errno))); rep_help_connect (glocal.lines); bool success = true; string msg; glocal.config.read (glocal.configfile); glocal.config.readsessions (glocal.sessionsfile); PROCESS_STATS stats; if (websites.size() == 0){ for (auto const &r:glocal.config.websites){ if (rssd_process(glocal.con,glocal.config,r.account,msg,false,stats)==-1){ success = false; break; } } }else{ for (auto r:websites){ if (rssd_process(glocal.con,glocal.config,r,msg,false,stats)==-1){ success = false; break; } } } rep_process (success,msg); glocal bool bod = false; rep_test (glocal.bod); tlmp_error ("Invalid command: %s\n",line); endclient = true; } bool some_errors = false; if (fdpass_setcontrol(s,glocal.control,glocal.user)==-1){ some_errors = true; } if (!some_errors && s.is_ok()){ s.setrawmode(true); if (glocal.daemon){ daemon_init(glocal.pidfile,glocal.user); } s.loop(); ret = 0; } }else{ usage(); } return ret; return glocal.ret; }