面试代码

tfidf 计算

image

image

[输入]:
word_list = []
for i in range(len(corpus)):
    word_list.append(corpus[i].split(' '))
print(word_list)
[输出]:
[['this', 'is', 'the', 'first', 'document'],
 ['this', 'is', 'the', 'second', 'second', 'document'],
 ['and', 'the', 'third', 'one'],
 ['is', 'this', 'the', 'first', 'document']]


[输入]:
countlist = []
for i in range(len(word_list)):
    count = Counter(word_list[i])
    countlist.append(count)
countlist
[输出]:
[Counter({'document': 1, 'first': 1, 'is': 1, 'the': 1, 'this': 1}),
 Counter({'document': 1, 'is': 1, 'second': 2, 'the': 1, 'this': 1}),
 Counter({'and': 1, 'one': 1, 'the': 1, 'third': 1}),
 Counter({'document': 1, 'first': 1, 'is': 1, 'the': 1, 'this': 1})]


# word可以通过count得到，count可以通过countlist得到
# count[word]可以得到每个单词的词频， sum(count.values())得到整个句子的单词总数
def tf(word, count):
    return count[word] / sum(count.values())
# 统计的是含有该单词的句子数
def n_containing(word, count_list):
    return sum(1 for count in count_list if word in count)
# len(count_list)是指句子的总数，n_containing(word, count_list)是指含有该单词的句子的总数，加1是为了防止分母为0
def idf(word, count_list):
    return math.log(len(count_list) / (1 + n_containing(word, count_list)))
# 将tf和idf相乘
def tfidf(word, count, count_list):
    return tf(word, count) * idf(word, count_list)

auc计算

在有M个正样本,N个负样本的数据集里。一共有MN对样本（一对样本即，一个正样本与一个负样本）。统计这MN对样本里，正样本的预测概率大于负样本的预测概率的个数。

image

image

ID label pro
A 0 0.1
B 0 0.4
C 1 0.4
D 1 0.8
假设有4条样本。2个正样本，2个负样本，那么M N=4。即总共有4个样本对。分别是：
（D,B）,（D,A）,(C,B),（C,A）。
auc=(1+1+1+0.5)/22=0.875
下面这个代码不能处理pred相等时的情况，但是之前又一次笔试这么写的都过了样例

#include
#include
#include
using namespace std;
struct node {
    int label;
    double pred;
}num[1010];
bool cmp(node x, node y)
{
    return x.pred < y.pred;
}
int main() {
    int n, neg = 0, pos = 0;
    cin >> n;
    for (int i = 0; i < n; i++)
    {
        cin >> num[i].label;
        if (num[i].label == 1)pos++;
        else neg++;
    }
    for (int i = 0; i < n; i++)
        cin >> num[i].pred;
    sort(num, num + n, cmp);
    int cnt = 0, cntsum = 0;
    for (int i = 0; i < n; i++)
    {
        if (num[i].label == 1)
            cntsum += cnt;
        else cnt++;
    }
    cout << cntsum * 1.0 / (pos*neg);
    return 0;
}
/*
4
0 0 1 1
0.1 0.4 0.35 0.8
*/

二叉树：输出根节点到叶子的路径

#include
#include
#include
using namespace std;
struct TreeNode {
    int val;
    TreeNode*right, *left;
};
void dfs(TreeNode*root, vector&path, vector>&ans)
{
    if (root == NULL)return;
    path.push_back(root->val);
    if (!root->left && !root->right) {
        ans.push_back(path);
        return;
    }
    if (root->left)dfs(root, path, ans);
    if (root->right)dfs(root, path, ans);
}
int main() {
    TreeNode*root;
    vectorpath;
    vector>ans;
    dfs(root, path, ans);
}

找到字串中合法匹配的连续对数

`#s = input()`

`s ``=` `'(())(()()()'`

`#s = '(())(()'`

`re ``=` `[]`

`dp ``=` `[``0``]`

`for` `i ``in` `s:`

`if` `not` `re:`

`re.append(i)`

`dp.append(``0``)`

`else``:`

`if` `i``=``=``'('``:`

`re.append(i)`

`dp.append(``0``)`

`else``:`

`if` `re[``-``1``]``=``=``'('``:`

`re.pop()`

`dp.append(dp.pop()``+``1``)`

`else``:`

`re.append(``')'``)`

`dp.append(``0``)`

`print``(re)`

`print``(dp)`

`m ``=` `0`

`cur ``=` `0`

`for` `i ``in` `dp:`

`if` `i!``=``0``:`

`cur``+``=``i`

`m ``=` `max``(cur,m)`

`else``:`

`cur ``=` `0`

`print``(m)`

题目：1－N的自然数中，少了一个，找出这个数

思路：
1、求和的思路。求出1-N的和，减去数列的总和，差即为这个数

public int findLost(int[] a,int n){
int sum = 0;
int sum1 = 0;
for(int i=0;i sum+=a[i];
sum1+=i;
}
int res = sum1+n-sum;
return res;
}
1
2
3
4
5
6
7
8
9
10
2、用异或的方法。任何数异或自己都等于0，任何数异或0都等于他自己。异或两次即可
假如缺的为3。result = 1²4⁵N
第二次异或后 result = 1²4⁵N ¹2³4⁵N
= 0^3 = 3

public int findLost(int[] a,int n){
int result = 0;
for (int i = 0; i < a.length; i++) {
result = result ^ a[i];
}
for (int i = 0; i < n; i++) {
result=result^(i+1);
}
return result;
}

题目：1－N的自然数中，少了两个，找出这个数

输入N，求N的阶乘的末尾有多少个0
思路：n！= 123...n；我们要分析一下0是怎么来的，0是2*5得来的，那也就是说看有多少个2,5就可以了，

     再分析，因子2出现的次数，2,4,6,8...，因子5出现的次数，5,10,15,25...

     很显然，2出现的次数一定是比5出现的次数多的，那么我们只需要计算5出现的次数有多少，就可以得到会有多少个10，也就是会有多少个0了。所以思路就是，遍历1-n，求每个数中因子5的个数，加加加，就ok了。

求完全二叉树的最后一层的最后一个节点

BinaryTreeNode* getLastNode(BinaryTreeNode* root)
{
    if(!root || (!root->left && !root->right))return root;
    int depth = 0;
    BinaryTreeNode* curNode = root;
    while(curNode)//计算二叉树的深度
    {
        depth++;
        curNode = curNode->left;
    }
    int level = 0,tmpDepth = 0;
    while(root)
    {
        level++;//当前遍历到哪一层,跟节点是第一层
        if(level == depth)break;//防止右子树为空
        curNode = root;
        if(curNode->right)
        {
            BinaryTreeNode* pCur = curNode;//当前节点的父节点
            curNode = curNode->right;
            tmpDepth = level + 1;
            while(curNode->left)
            {
                tmpDepth++;
                pCur = curNode;
                curNode = curNode->left;
            }
            if(tmpDepth < depth)root = root -> left;//二分查找左子树
            else if(!pCur->right || pCur->right == curNode)return curNode;
            else root = root->right;//二分查找右子树
        }
        else root = root->left;
    }
    return root;
}

判断是否为完全二叉树

链表快排

ListNode* partition(ListNode*start, ListNode*end)
{
    int num = start->val;
    ListNode*p = start, *q = start->next;
    while (q != end)
    {
        if (q->val < num) {
            p = p->next;
            swap(p->val, q->val);
        }
        q = q->next;
    }
    swap(p->val, start->val);
    return p;
}
void quicksort(ListNode*start, ListNode*end) {
    if (start != end)
    {
        ListNode*index = partition(start, end);
        quicksort(start, index);
        quicksort(index->next, end);
    }
}

不同长度的绳子有不同的价值，一根绳子如何切分可以让总价值最大

https://www.cnblogs.com/tgycoder/p/4980655.html

算法导论钢条切割

int dfs(int p[], int n)
{
    if (n == 0)return 0;
    int profit = 0;
    for (int i = 1; i <= n; i++)
        profit = max(profit, p[i] + dfs(p, n - i));
    return profit;
}
int dfs(int p[], int r[], int n)
{
    if (r[n])return r[n];
    int profit = 0;
    for (int i = 1; i <= n; i++)
        profit = max(profit, p[i] + dfs(p, r, n - i));
    r[n] = profit;
    return profit;
}

dijkstra

int G[maxn][maxn];
int d[maxn];
bool vis[maxn];
void dijkstra(int s)
{
    fill(d, d + maxn, INF);
    d[s] = 0;
    while (1)
    {
        int u = -1, mind = INF;
        for (int i = 0; i < n; i++)
            if (!vis[i] && d[i] < mind)mind = d[i], u = i;
        if (u == -1)break;
        vis[u] = true;
        for (int v = 0; v < n; v++)
        {
            if (!vis[v] && G[u][v] < INF)
                d[v] = min(d[v], d[u] + G[u][v]);
        }
    }
}

数轴上的最长连续线段

struct Interval {
    int start, end;
};
static bool cmp(Interval x, Interval y)
{
    return x.start < y.start;
}
int merge(vectorintervals) {
    vectorans;
    int maxlen = 0;
    sort(intervals.begin(), intervals.end(), cmp);
    ans.push_back(intervals[0]);
    for (int i = 1; i < intervals.size(); i++)
    {
        if (intervals[i].start <= ans.back().end)
        {
            ans.back().start = min(ans.back().start, intervals[i].start);
            ans.back().end = max(ans.back().end, intervals[i].end);
            maxlen = max(maxlen, ans.back().end - ans.back().start - 1);
        }
        else if (intervals[i].start > ans.back().end)
        {
            ans.push_back(intervals[i]);
            maxlen = max(maxlen, intervals[i].start-intervals[i].end);
        }
    }
    return maxlen;
}

矩阵中的最短路，有门有钥匙。动态规划加状态向量

一个m*n矩阵，只能往左或往右，矩阵中的数字代表不同的权值，求一条路径，该路径的权值和与所给的target值最接近。

找子串在原串中第一次出现的位置

https://blog.csdn.net/souldak/article/details/11553409

面试代码

tfidf 计算

auc计算

二叉树：输出根节点到叶子的路径

找到字串中合法匹配的连续对数

求完全二叉树的最后一层的最后一个节点

判断是否为完全二叉树

链表快排

不同长度的绳子有不同的价值，一根绳子如何切分可以让总价值最大

dijkstra

数轴上的最长连续线段

矩阵中的最短路，有门有钥匙。动态规划加状态向量

一个m*n矩阵，只能往左或往右，矩阵中的数字代表不同的权值，求一条路径，该路径的权值和与所给的target值最接近。

你可能感兴趣的:(面试代码)